Esempio n. 1
0
        private void getFragmentationStatistics()
        {
            IList <object[]> queryRows;

            lock (session)
            {
                var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch))
                                .List <long>()
                                .Shuffle()
                                .Take(1000)
                                .OrderBy(o => o);
                string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray());
                queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " +
                                                "FROM PeptideSpectrumMatch psm " +
                                                "LEFT JOIN psm.Modifications pm " +
                                                "LEFT JOIN pm.Modification mod " +
                                                "WHERE psm.Id IN (" + randomIdSet + ") " +
                                                "GROUP BY psm.Spectrum.id ")
                            .List <object[]>();
            }
            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName);

            var percentTicBySpectrumByFragmentType       = new List <PointPairList>();
            var percentPeakCountBySpectrumByFragmentType = new List <PointPairList>();
            var meanMzErrorBySpectrumByFragmentType      = new List <PointPairList>();
            var percentTicListByFragmentType             = new List <List <double> >();
            var percentPeakCountListByFragmentType       = new List <List <double> >();
            var meanMzErrorListByFragmentType            = new List <List <double> >();

            foreach (var graphControl in graphControls)
            {
                graphControl.MasterPane.PaneList.ForEach(o => o.CurveList.ForEach(c => c.Clear()));
            }

            for (int i = 0; i < (int)IonSeries.Count; ++i)
            {
                percentTicBySpectrumByFragmentType.Add(percentTicGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                percentPeakCountBySpectrumByFragmentType.Add(percentPeakCountGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                meanMzErrorBySpectrumByFragmentType.Add(meanMzErrorGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                percentTicListByFragmentType.Add(new List <double>());
                percentPeakCountListByFragmentType.Add(new List <double>());
                meanMzErrorListByFragmentType.Add(new List <double>());
            }

            int spectraCount = 0;

            maxPercentTic       = 10;
            maxPercentPeakCount = 10;
            maxMeanMzError      = 0.1;
            var tolerance = fragmentTolerance;

            string spectrumListFilters = String.Empty;

            Invoke(new MethodInvoker(() =>
            {
                tolerance.value = Convert.ToDouble(fragmentToleranceTextBox.Text);
                tolerance.units = (MZTolerance.Units)fragmentToleranceUnitsComboBox.SelectedIndex;
                meanMzErrorGraphControl.GraphPane.YAxis.Title.Text = "Mean m/z error (" + tolerance.units.ToString() + ")";

                spectrumListFilters = spectrumFiltersTextBox.Text;
                setAutomaticScales();
            }));

            var points = new PointPairList();

            string currentSourceName = null;
            string currentSourcePath = null;

            msdata.MSData msd = null;

            lock (owner)
                foreach (var row in spectrumRows)
                {
                    if (row.SourceName != currentSourceName)
                    {
                        currentSourceName = row.SourceName;
                        currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                        if (String.IsNullOrEmpty(currentSourcePath))
                        {
                            throw new FileNotFoundException("source file not found");
                        }
                        msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                        //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value);
                        //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param;
                        SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                    }

                    string spectrumId = String.Format("{0}/{1}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID));

                    var spectrumList = msd.run.spectrumList;

                    ++spectraCount;

                    var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto,
                                                           proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
                    var fragmentation = pwizPeptide.fragmentation(true, true);

                    var    pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true);
                    var    pointMap     = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data));
                    double tic          = pointMap.Values.Sum();

                    var percentTicByFragmentType       = new List <double>(Enumerable.Repeat(0.0, (int)IonSeries.Count));
                    var percentPeakCountByFragmentType = new List <double>(Enumerable.Repeat(0.0, (int)IonSeries.Count));
                    var matchCountByFragmentType       = new List <int>(Enumerable.Repeat(0, (int)IonSeries.Count));
                    var meanMzErrorByFragmentType      = new List <double>(Enumerable.Repeat(Double.NaN, (int)IonSeries.Count));

                    seems.PointMap.Enumerator itr;
                    double      expected;
                    IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast <IonSeries>().Where(o => o != IonSeries.Count).ToArray();

                    for (int z = 1; z <= 1; ++z)
                    {
                        for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length)
                        {
                            foreach (IonSeries series in ionSeries)
                            {
                                if ((series == IonSeries.c || series == IonSeries.x) &&
                                    length == pwizPeptide.sequence.Length)
                                {
                                    continue;
                                }

                                expected = fragmentMass(fragmentation, series, length, z);
                                itr      = pointMap.FindNear(expected, expected - (expected - tolerance));
                                if (itr != null && itr.IsValid)
                                {
                                    percentTicByFragmentType[(int)series] += itr.Current.Value;
                                    ++percentPeakCountByFragmentType[(int)series];
                                    ++matchCountByFragmentType[(int)series];
                                    if (Double.IsNaN(meanMzErrorByFragmentType[(int)series]))
                                    {
                                        meanMzErrorByFragmentType[(int)series] = 0;
                                    }
                                    meanMzErrorByFragmentType[(int)series] += mzError(itr.Current.Key, expected);
                                }
                            }
                        }
                    }

                    var rng = new Random();

                    for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i)
                    {
                        // convert sum to mean
                        if (percentPeakCountByFragmentType[i] > 0)
                        {
                            meanMzErrorByFragmentType[i] /= matchCountByFragmentType[i];
                        }

                        // convert to percentages
                        percentTicByFragmentType[i]       /= tic / 100;
                        percentPeakCountByFragmentType[i] /= pointMap.Count / 100.0;

                        maxPercentTic       = Math.Max(maxPercentTic, percentTicByFragmentType[i]);
                        maxPercentPeakCount = Math.Max(maxPercentPeakCount, percentPeakCountByFragmentType[i]);

                        double jitter = (rng.NextDouble() - 0.5);
                        percentTicBySpectrumByFragmentType[i].Add(jitter, percentTicByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentTicByFragmentType[i], matchCountByFragmentType[i]));
                        percentPeakCountBySpectrumByFragmentType[i].Add(jitter, percentPeakCountByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentPeakCountByFragmentType[i], matchCountByFragmentType[i]));

                        percentTicListByFragmentType[i].Add(percentTicByFragmentType[i]);
                        percentPeakCountListByFragmentType[i].Add(percentPeakCountByFragmentType[i]);

                        if (!Double.IsNaN(meanMzErrorByFragmentType[i]))
                        {
                            maxMeanMzError = Math.Max(maxMeanMzError, Math.Abs(meanMzErrorByFragmentType[i]));
                            meanMzErrorBySpectrumByFragmentType[i].Add(jitter, meanMzErrorByFragmentType[i], String.Format("{0}: {1:G4}%", spectrumId, meanMzErrorByFragmentType[i]));
                            meanMzErrorListByFragmentType[i].Add(meanMzErrorByFragmentType[i]);
                        }
                    }

                    if ((spectraCount % 100) == 0)
                    {
                        setAutomaticScales();
                    }
                }// for each spectrum row

            Invoke(new MethodInvoker(() =>
            {
                for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i)
                {
                    if (percentTicListByFragmentType[i].Count < 5)
                    {
                        continue;
                    }
                    percentTicListByFragmentType[i].Sort();
                    percentPeakCountListByFragmentType[i].Sort();
                    addSixNumberSummary(percentTicGraphControl.MasterPane.PaneList[i + 1], percentTicListByFragmentType[i]);
                    addSixNumberSummary(percentPeakCountGraphControl.MasterPane.PaneList[i + 1], percentPeakCountListByFragmentType[i]);

                    if (meanMzErrorListByFragmentType[i].Count < 5)
                    {
                        continue;
                    }
                    meanMzErrorListByFragmentType[i].Sort();
                    addSixNumberSummary(meanMzErrorGraphControl.MasterPane.PaneList[i + 1], meanMzErrorListByFragmentType[i]);
                }
            }));
        }
Esempio n. 2
0
        private List <double> getPeakStatistics()
        {
            IList <object[]> queryRows;

            lock (session)
            {
                var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch))
                                .List <long>()
                                .Shuffle()
                                .Take(1000)
                                .OrderBy(o => o);
                string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray());
                queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " +
                                                "FROM PeptideSpectrumMatch psm " +
                                                "LEFT JOIN psm.Modifications pm " +
                                                "LEFT JOIN pm.Modification mod " +
                                                "WHERE psm.Id IN (" + randomIdSet + ") " +
                                                "GROUP BY psm.Spectrum.id ")
                            .List <object[]>();
            }
            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName);

            precursorScatterPlot.Clear();
            chargeReducedScatterPlot.Clear();

            int spectraCount = 0;

            string spectrumListFilters = String.Empty;

            Invoke(new MethodInvoker(() =>
            {
                spectrumListFilters = spectrumFiltersTextBox.Text;
                zedGraphControl.MasterPane.AxisChange();
                zedGraphControl.Refresh();
            }));

            var points = new PointPairList();

            string currentSourceName = null;
            string currentSourcePath = null;

            msdata.MSData msd = null;

            lock (owner)
                foreach (var row in spectrumRows)
                {
                    if (row.SourceName != currentSourceName)
                    {
                        currentSourceName = row.SourceName;
                        currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                        msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                        //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value);
                        //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param;
                        SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                    }

                    string label = String.Format("{0}/{1}\n{2}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID), row.ModifiedSequence);

                    var spectrumList = msd.run.spectrumList;

                    ++spectraCount;

                    var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto,
                                                           proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
                    var fragmentation = pwizPeptide.fragmentation(true, true);

                    var    pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true);
                    var    pointMap     = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data));
                    double tic          = pointMap.Values.Sum();

                    double precursorMz = row.Spectrum.PrecursorMZ;
                    double chargeReducedPrecursorMz = precursorMz * row.PeptideSpectrumMatch.Charge;

                    bool plotMatchedPeaks   = true;
                    bool removeMatchedPeaks = false;

                    double tolerance = 0.03;
                    seems.PointMap.Enumerator itr;
                    IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast <IonSeries>().Where(o => o != IonSeries.Count).ToArray();

                    for (int z = 1; z <= 1; ++z)
                    {
                        for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length)
                        {
                            string NTermFragment = row.ModifiedSequence.Substring(0, length);
                            string CTermFragment = row.ModifiedSequence.Substring(row.ModifiedSequence.Length - length);

                            foreach (IonSeries series in ionSeries)
                            {
                                if ((series == IonSeries.c || series == IonSeries.cMinus1 || series == IonSeries.x) &&
                                    length == pwizPeptide.sequence.Length)
                                {
                                    continue;
                                }

                                itr = pointMap.FindNear(fragmentMass(fragmentation, series, length, z), tolerance);
                                if (itr != null && itr.IsValid)
                                {
                                    if (plotMatchedPeaks)
                                    {
                                        precursorScatterPlot.AddPoint(new PointPair(itr.Current.Key - precursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, precursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length)));
                                        chargeReducedScatterPlot.AddPoint(new PointPair(itr.Current.Key - chargeReducedPrecursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, chargeReducedPrecursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length)));
                                    }

                                    if (removeMatchedPeaks)
                                    {
                                        pointMap.Remove(itr);
                                    }
                                }
                            }
                        }
                    }

                    foreach (var pair in pointMap)
                    {
                        precursorScatterPlot.AddPoint(new PointPair(pair.Key - precursorMz, pair.Value / tic, 0, label));
                        chargeReducedScatterPlot.AddPoint(new PointPair(pair.Key - chargeReducedPrecursorMz, pair.Value / tic, 0, label));
                    }

                    if ((spectraCount % 100) == 0)
                    {
                        Invoke(new MethodInvoker(() =>
                        {
                            zedGraphControl.MasterPane.AxisChange();
                            zedGraphControl.Refresh();
                        }));
                    }
                }

            Invoke(new MethodInvoker(() =>
            {
                if (!lockZoomCheckBox.Checked)
                {
                    zedGraphControl.ZoomOutAll(zedGraphControl.GraphPane);
                }
                zedGraphControl.MasterPane.AxisChange();
                zedGraphControl.Refresh();
            }));
            return(new List <double>()); //percentTicBySpectrumByFragmentType[1];
        }
Esempio n. 3
0
        private phosphoRS.PTMResultClass RunOnSource(string sourceFilepath, int currentSource, int totalSources, PhosphoRSConfig config, IDictionary<long, PhosphoPeptideAttestationRow> phosphoRows)
        {
            var msd = new pwiz.CLI.msdata.MSDataFile(sourceFilepath);
            var spectrumList = msd.run.spectrumList;

            int rowNumber = 0;
            int totalRows = phosphoRows.Count();
            items.Clear();

            var spectrumTypes = new Set<CVID>();

            foreach (var row in phosphoRows)
            {
                if (rowNumber == 0 || (rowNumber % 100) == 0)
                {
                    if (cancelAttestation.IsCancellationRequested)
                    {
                        this.progressBar.ProgressBar.Visible = false;
                        _bgWorkerCancelled = true;
                        setProgress(-1, "Cancelled.");
                        return null;
                    }
                    else
                    {
                        if (rowNumber == 0)
                            setStatus(String.Format("Reading peaks and creating PhosphoRS objects for source {0} of {1} ({2}): {3} spectra\r\n", currentSource, totalSources, Path.GetFileName(sourceFilepath), totalRows));
                        setProgress((rowNumber + 1) / totalRows * 100, String.Format("Reading peaks and creating PhosphoRS objects for source {0} of {1} ({2}): {3}/{4} spectra", currentSource, totalSources, Path.GetFileName(sourceFilepath), rowNumber + 1, totalRows));
                    }
                }

                var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Value.SpectrumNativeID), true); //may create indexoutofrange error if no spectrum nativeID                   

                var OriginalMZs = pwizSpectrum.getMZArray().data; //getMZArray().data returns IList<double>
                var OriginalIntensities = pwizSpectrum.getIntensityArray().data;
                row.Value.Peaks = new phosphoRS.Peak[OriginalMZs.Count];
                for (int i = 0; i < OriginalMZs.Count; ++i)
                    row.Value.Peaks[i] = new phosphoRS.Peak(OriginalMZs[i], OriginalIntensities[i]);

                if (config.spectrumType == phosphoRS.SpectrumType.None)
                {
                    row.Value.SpectrumType = phosphoRS.SpectrumType.None;
                    foreach (var precursor in pwizSpectrum.precursors)
                        foreach (var method in precursor.activation.cvParamChildren(CVID.MS_dissociation_method))
                        {
                            // if dissociation method is set to "Auto" but could not be determined from the file, alert the user
                            if (!spectrumTypeByDissociationMethod.Contains(method.cvid))
                                throw new InvalidDataException("cannot handle unmapped dissociation method \"" + CV.cvTermInfo(method.cvid).shortName() + "\" for spectrum \"" + row.Value.SourceName + "/" + row.Value.SpectrumNativeID + "\"; please override the method manually");
                            else if (row.Value.SpectrumType != phosphoRS.SpectrumType.ECD_ETD) // don't override ETD (e.g. if there is also supplemental CID)
                            {
                                row.Value.SpectrumType = spectrumTypeByDissociationMethod[method.cvid];
                                spectrumTypes.Add(method.cvid);
                            }
                        }

                    if (row.Value.SpectrumType == phosphoRS.SpectrumType.None)
                        throw new InvalidDataException("cannot find a dissociation method for spectrum \"" + row.Value.SourceName + "/" + row.Value.SpectrumNativeID + "\"; please set the method manually");
                }
                else
                    row.Value.SpectrumType = config.spectrumType;

                var psm = getPhosphoRS_PSM(config, row.Value);

                // DEBUG
                //tbStatus.AppendText(PeptideToString(phosphoPeptide) + "," + AAS.ToOneLetterCodeString() + "," + ptmRepresentation.ToString() + "\n");
                // Init the mod map of original variant for this PSM.
                var id2ModMap = new List<System.Tuple<int, List<int>>> { new System.Tuple<int, List<int>>((int) row.Value.PSMId, row.Value.OriginalPhosphoSites.Keys.ToList<int>()) };

                items.Add(new System.Tuple<phosphoRS.PeptideSpectrumMatch, List<System.Tuple<int, List<int>>>>(psm, id2ModMap));

                ++rowNumber;
            }

            // report automatically found fragmentation method
            if (config.spectrumType == phosphoRS.SpectrumType.None)
                setStatus(String.Format("Found {0} fragmentation types: {1}\r\n", spectrumTypes.Count, String.Join(", ", spectrumTypes.Keys.Select(o => CV.cvTermInfo(o).shortName()))));

            setProgress(currentSource / totalSources * 100, String.Format("Running PhosphoRS on source {0} of {1} ({2})...", currentSource, totalSources, Path.GetFileName(sourceFilepath)));

            // Initialize the localization.
            currentNr = 0;
            var phosphoRS_Context = new phosphoRS.ThreadManagement(this, cancelAttestation, config.maxIsoformCount, config.maxPTMCount, config.scoreNLToo, config.fragmentMassTolerance, config.scoredAA, items.Count);

            // Start the site localization (takes advantage of multi-threading)
            try
            {
                phosphoRS_Context.StartPTMLocalisation();

                // Safety if the attestation module doesn't throw the exception.
                if (cancelAttestation.IsCancellationRequested)
                {
                    this.progressBar.ProgressBar.Visible = false;
                    _bgWorkerCancelled = true;
                    setProgress(-1, "Cancelled.");
                    return null;
                }

                return phosphoRS_Context.PTMResult;
            }
            catch (OperationCanceledException)
            {
                this.progressBar.ProgressBar.Visible = false;
                _bgWorkerCancelled = true;
                setProgress(-1, "Cancelled.");
                return null;
            }
            finally
            {
                msd.Dispose();
            }
        }
Esempio n. 4
0
        private phosphoRS.PTMResultClass RunOnSource(string sourceFilepath, int currentSource, int totalSources, PhosphoRSConfig config, IDictionary <long, PhosphoPeptideAttestationRow> phosphoRows)
        {
            var msd          = new pwiz.CLI.msdata.MSDataFile(sourceFilepath);
            var spectrumList = msd.run.spectrumList;

            int rowNumber = 0;
            int totalRows = phosphoRows.Count();

            items.Clear();

            var spectrumTypes = new Set <CVID>();

            foreach (var row in phosphoRows)
            {
                if (rowNumber == 0 || (rowNumber % 100) == 0)
                {
                    if (cancelAttestation.IsCancellationRequested)
                    {
                        this.progressBar.ProgressBar.Visible = false;
                        _bgWorkerCancelled = true;
                        setProgress(-1, "Cancelled.");
                        return(null);
                    }
                    else
                    {
                        if (rowNumber == 0)
                        {
                            setStatus(String.Format("Reading peaks and creating PhosphoRS objects for source {0} of {1} ({2}): {3} spectra\r\n", currentSource, totalSources, Path.GetFileName(sourceFilepath), totalRows));
                        }
                        setProgress((rowNumber + 1) / totalRows * 100, String.Format("Reading peaks and creating PhosphoRS objects for source {0} of {1} ({2}): {3}/{4} spectra", currentSource, totalSources, Path.GetFileName(sourceFilepath), rowNumber + 1, totalRows));
                    }
                }

                var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Value.SpectrumNativeID), true); //may create indexoutofrange error if no spectrum nativeID

                var OriginalMZs         = pwizSpectrum.getMZArray().data;                                      //getMZArray().data returns IList<double>
                var OriginalIntensities = pwizSpectrum.getIntensityArray().data;
                row.Value.Peaks = new phosphoRS.Peak[OriginalMZs.Count];
                for (int i = 0; i < OriginalMZs.Count; ++i)
                {
                    row.Value.Peaks[i] = new phosphoRS.Peak(OriginalMZs[i], OriginalIntensities[i]);
                }

                if (config.spectrumType == phosphoRS.SpectrumType.None)
                {
                    row.Value.SpectrumType = phosphoRS.SpectrumType.None;
                    foreach (var precursor in pwizSpectrum.precursors)
                    {
                        foreach (var method in precursor.activation.cvParamChildren(CVID.MS_dissociation_method))
                        {
                            // if dissociation method is set to "Auto" but could not be determined from the file, alert the user
                            if (!spectrumTypeByDissociationMethod.Contains(method.cvid))
                            {
                                throw new InvalidDataException("cannot handle unmapped dissociation method \"" + CV.cvTermInfo(method.cvid).shortName() + "\" for spectrum \"" + row.Value.SourceName + "/" + row.Value.SpectrumNativeID + "\"; please override the method manually");
                            }
                            else if (row.Value.SpectrumType != phosphoRS.SpectrumType.ECD_ETD) // don't override ETD (e.g. if there is also supplemental CID)
                            {
                                row.Value.SpectrumType = spectrumTypeByDissociationMethod[method.cvid];
                                spectrumTypes.Add(method.cvid);
                            }
                        }
                    }

                    if (row.Value.SpectrumType == phosphoRS.SpectrumType.None)
                    {
                        throw new InvalidDataException("cannot find a dissociation method for spectrum \"" + row.Value.SourceName + "/" + row.Value.SpectrumNativeID + "\"; please set the method manually");
                    }
                }
                else
                {
                    row.Value.SpectrumType = config.spectrumType;
                }

                var psm = getPhosphoRS_PSM(config, row.Value);

                // DEBUG
                //tbStatus.AppendText(PeptideToString(phosphoPeptide) + "," + AAS.ToOneLetterCodeString() + "," + ptmRepresentation.ToString() + "\n");
                // Init the mod map of original variant for this PSM.
                var id2ModMap = new List <System.Tuple <int, List <int> > > {
                    new System.Tuple <int, List <int> >((int)row.Value.PSMId, row.Value.OriginalPhosphoSites.Keys.ToList <int>())
                };

                items.Add(new System.Tuple <phosphoRS.PeptideSpectrumMatch, List <System.Tuple <int, List <int> > > >(psm, id2ModMap));

                ++rowNumber;
            }

            // report automatically found fragmentation method
            if (config.spectrumType == phosphoRS.SpectrumType.None)
            {
                setStatus(String.Format("Found {0} fragmentation types: {1}\r\n", spectrumTypes.Count, String.Join(", ", spectrumTypes.Keys.Select(o => CV.cvTermInfo(o).shortName()))));
            }

            setProgress(currentSource / totalSources * 100, String.Format("Running PhosphoRS on source {0} of {1} ({2})...", currentSource, totalSources, Path.GetFileName(sourceFilepath)));

            // Initialize the localization.
            currentNr = 0;
            var phosphoRS_Context = new phosphoRS.ThreadManagement(this, cancelAttestation, config.maxIsoformCount, config.maxPTMCount, config.scoreNLToo, config.fragmentMassTolerance, config.scoredAA, items.Count);

            // Start the site localization (takes advantage of multi-threading)
            try
            {
                phosphoRS_Context.StartPTMLocalisation();

                // Safety if the attestation module doesn't throw the exception.
                if (cancelAttestation.IsCancellationRequested)
                {
                    this.progressBar.ProgressBar.Visible = false;
                    _bgWorkerCancelled = true;
                    setProgress(-1, "Cancelled.");
                    return(null);
                }

                return(phosphoRS_Context.PTMResult);
            }
            catch (OperationCanceledException)
            {
                this.progressBar.ProgressBar.Visible = false;
                _bgWorkerCancelled = true;
                setProgress(-1, "Cancelled.");
                return(null);
            }
            finally
            {
                msd.Dispose();
            }
        }
        private void getFragmentationStatistics ()
        {
            IList<object[]> queryRows;
            lock (session)
            {
                var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch))
                                       .List<long>()
                                       .Shuffle()
                                       .Take(1000)
                                       .OrderBy(o => o);
                string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray());
                queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " +
                                                "FROM PeptideSpectrumMatch psm " +
                                                "LEFT JOIN psm.Modifications pm " + 
                                                "LEFT JOIN pm.Modification mod " +
                                                "WHERE psm.Id IN (" + randomIdSet + ") " +
                                                "GROUP BY psm.Spectrum.id ")
                                   .List<object[]>();
            }
            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName);

            var percentTicBySpectrumByFragmentType = new List<PointPairList>();
            var percentPeakCountBySpectrumByFragmentType = new List<PointPairList>();
            var meanMzErrorBySpectrumByFragmentType = new List<PointPairList>();
            var percentTicListByFragmentType = new List<List<double>>();
            var percentPeakCountListByFragmentType = new List<List<double>>();
            var meanMzErrorListByFragmentType = new List<List<double>>();

            foreach(var graphControl in graphControls)
                graphControl.MasterPane.PaneList.ForEach(o => o.CurveList.ForEach(c => c.Clear()));

            for (int i = 0; i < (int) IonSeries.Count; ++i)
            {
                percentTicBySpectrumByFragmentType.Add(percentTicGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                percentPeakCountBySpectrumByFragmentType.Add(percentPeakCountGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                meanMzErrorBySpectrumByFragmentType.Add(meanMzErrorGraphControl.MasterPane.PaneList[i + 1].CurveList[3].Points as PointPairList);
                percentTicListByFragmentType.Add(new List<double>());
                percentPeakCountListByFragmentType.Add(new List<double>());
                meanMzErrorListByFragmentType.Add(new List<double>());
            }

            int spectraCount = 0;
            maxPercentTic = 10;
            maxPercentPeakCount = 10;
            maxMeanMzError = 0.1;
            var tolerance = fragmentTolerance;

            string spectrumListFilters = String.Empty;
            Invoke(new MethodInvoker(() =>
            {
                tolerance.value = Convert.ToDouble(fragmentToleranceTextBox.Text);
                tolerance.units = (MZTolerance.Units) fragmentToleranceUnitsComboBox.SelectedIndex;
                meanMzErrorGraphControl.GraphPane.YAxis.Title.Text = "Mean m/z error (" + tolerance.units.ToString() + ")";

                spectrumListFilters = spectrumFiltersTextBox.Text;
                setAutomaticScales();
            }));

            var points = new PointPairList();

            string currentSourceName = null;
            string currentSourcePath = null;
            msdata.MSData msd = null;

            lock (owner)
            foreach (var row in spectrumRows)
            {
                if (row.SourceName != currentSourceName)
                {
                    currentSourceName = row.SourceName;
                    currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                    if (String.IsNullOrEmpty(currentSourcePath))
                        throw new FileNotFoundException("source file not found");
                    msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                    //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value);
                    //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param;
                    SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }

                string spectrumId = String.Format("{0}/{1}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID));

                var spectrumList = msd.run.spectrumList;

                ++spectraCount;

                var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto,
                                                       proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
                var fragmentation = pwizPeptide.fragmentation(true, true);

                var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true);
                var pointMap = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data));
                double tic = pointMap.Values.Sum();

                var percentTicByFragmentType = new List<double>(Enumerable.Repeat(0.0, (int) IonSeries.Count));
                var percentPeakCountByFragmentType = new List<double>(Enumerable.Repeat(0.0, (int) IonSeries.Count));
                var matchCountByFragmentType = new List<int>(Enumerable.Repeat(0, (int) IonSeries.Count));
                var meanMzErrorByFragmentType = new List<double>(Enumerable.Repeat(Double.NaN, (int) IonSeries.Count));

                seems.PointMap.Enumerator itr;
                double expected;
                IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast<IonSeries>().Where(o => o != IonSeries.Count).ToArray();

                for (int z = 1; z <= 1; ++z)
                for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length)
                foreach (IonSeries series in ionSeries)
                {
                    if ((series == IonSeries.c || series == IonSeries.x) &&
                        length == pwizPeptide.sequence.Length)
                        continue;

                    expected = fragmentMass(fragmentation, series, length, z);
                    itr = pointMap.FindNear(expected, expected - (expected - tolerance));
                    if (itr != null && itr.IsValid)
                    {
                        percentTicByFragmentType[(int)series] += itr.Current.Value;
                        ++percentPeakCountByFragmentType[(int)series];
                        ++matchCountByFragmentType[(int)series];
                        if (Double.IsNaN(meanMzErrorByFragmentType[(int)series])) meanMzErrorByFragmentType[(int)series] = 0;
                        meanMzErrorByFragmentType[(int)series] += mzError(itr.Current.Key, expected);
                    }
                }

                var rng = new Random();

                for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i)
                {
                    // convert sum to mean
                    if (percentPeakCountByFragmentType[i] > 0)
                        meanMzErrorByFragmentType[i] /= matchCountByFragmentType[i];

                    // convert to percentages
                    percentTicByFragmentType[i] /= tic / 100;
                    percentPeakCountByFragmentType[i] /= pointMap.Count / 100.0;

                    maxPercentTic = Math.Max(maxPercentTic, percentTicByFragmentType[i]);
                    maxPercentPeakCount = Math.Max(maxPercentPeakCount, percentPeakCountByFragmentType[i]);

                    double jitter = (rng.NextDouble() - 0.5);
                    percentTicBySpectrumByFragmentType[i].Add(jitter, percentTicByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentTicByFragmentType[i], matchCountByFragmentType[i]));
                    percentPeakCountBySpectrumByFragmentType[i].Add(jitter, percentPeakCountByFragmentType[i], String.Format("{0}: {1:G4}% ({2} matches)", spectrumId, percentPeakCountByFragmentType[i], matchCountByFragmentType[i]));

                    percentTicListByFragmentType[i].Add(percentTicByFragmentType[i]);
                    percentPeakCountListByFragmentType[i].Add(percentPeakCountByFragmentType[i]);

                    if (!Double.IsNaN(meanMzErrorByFragmentType[i]))
                    {
                        maxMeanMzError = Math.Max(maxMeanMzError, Math.Abs(meanMzErrorByFragmentType[i]));
                        meanMzErrorBySpectrumByFragmentType[i].Add(jitter, meanMzErrorByFragmentType[i], String.Format("{0}: {1:G4}%", spectrumId, meanMzErrorByFragmentType[i]));
                        meanMzErrorListByFragmentType[i].Add(meanMzErrorByFragmentType[i]);
                    }
                }

                if ((spectraCount % 100) == 0)
                    setAutomaticScales();
            } // for each spectrum row

            Invoke(new MethodInvoker(() =>
            {
                for (int i = 0; i < percentTicBySpectrumByFragmentType.Count; ++i)
                {
                    if (percentTicListByFragmentType[i].Count < 5)
                        continue;
                    percentTicListByFragmentType[i].Sort();
                    percentPeakCountListByFragmentType[i].Sort();
                    addSixNumberSummary(percentTicGraphControl.MasterPane.PaneList[i + 1], percentTicListByFragmentType[i]);
                    addSixNumberSummary(percentPeakCountGraphControl.MasterPane.PaneList[i + 1], percentPeakCountListByFragmentType[i]);

                    if (meanMzErrorListByFragmentType[i].Count < 5)
                        continue;
                    meanMzErrorListByFragmentType[i].Sort();
                    addSixNumberSummary(meanMzErrorGraphControl.MasterPane.PaneList[i + 1], meanMzErrorListByFragmentType[i]);
                }
            }));
        }
Esempio n. 6
0
        /// <summary>
        /// run clustering, Rescue PSMs, update idpDB
        /// </summary>
        private void RescuePSMsByClustering()
        {
            DateTime startTime = DateTime.Now;
            reportProgressDelegate reportProgress = new reportProgressDelegate(setProgress);
            reportStatusDelegate reportStatus = new reportStatusDelegate(setStatus);

            string database = session.Connection.GetDataSource();
            logFile = Path.ChangeExtension(database, ".log.txt");

            string config = string.Format("Parameters:\r\n" +
                                          "PrecursorMZTol: {0} \r\n" +
                                          "FragmentMZTol: {1} \r\n" +
                                          "Similarity Threshold >= {2} \r\n" +
                                          "Rank <= {3} \r\n" +
                                          "Cluster Size >= {4} \r\n" +
                                          "Search Scores: {5}{6}{7};{8}{9}{10};{11}{12}{13} \r\n\r\n",
                                          precursorMzTolerance,
                                          fragmentMzTolerance,
                                          similarityThreshold,
                                          maxRank,
                                          minClusterSize,
                                          searchScore1Name, searchScore1Order, searchScore1Threshold,
                                          searchScore2Name, searchScore2Order, searchScore2Threshold,
                                          searchScore3Name, searchScore3Order, searchScore3Threshold);
            reportStatus(config);

            //if (writeLog)
            //    File.WriteAllText(logFile, config);

            /*
             * back up original idpDB
             */
            if (backupDB)
            {
                string dbBackupFile = Path.ChangeExtension(database, ".backup.idpDB");
                reportStatus(string.Format("Backing up idpDB to {0} ... ", dbBackupFile));
                reportProgress(-1, "Backing up idpDB");
                File.Copy(database, dbBackupFile, true);
                reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            }

            //reportStatus("Dropping filters... \r\n");
            // basicDataFilter.DropFilters(session);  //// this will drop all filtered tables and rename unfiltered tables
            //basicDataFilter.ApplyBasicFilters(session);

            reportStatus("Querying spectra...");
            reportProgress(-1, "Querying spectra...");
            IList<object[]> queryRows;
            lock (session)
                //// SQL query to retrieve spectrum info for unfiltered psm, filter query results by rank1 search score
                //                queryRows = session.CreateSQLQuery(@"SELECT s.Id, source.Name, NativeID, PrecursorMZ
                //                                                        FROM Spectrum s
                //                                                        JOIN SpectrumSource source ON s.Source = source.Id
                //                                                        JOIN UnfilteredPeptideSpectrumMatch psm ON s.Id = psm.Spectrum AND psm.Rank = 1
                //                                                        JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                //                                                        JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                //                                                        WHERE (scoreName.Name = " + "'" + searchScore1Name + "'" + " AND psmScore.Value " + searchScore1Order + searchScore1Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore2Name + "'" + " AND psmScore.Value " + searchScore2Order + searchScore2Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore3Name + "'" + " AND psmScore.Value " + searchScore3Order + searchScore3Threshold.ToString() + ")" +
                //                                                        " GROUP BY s.Id"
                //                                                    ).List<object[]>();

                //// SQL query to retrieve spectrum info for unfiltered psm that map to identified peptide, filter by search score 
                queryRows = session.CreateSQLQuery(@"SELECT s.Id, source.Name, NativeID, PrecursorMZ
                                                        FROM UnfilteredSpectrum s
                                                        JOIN SpectrumSource source ON s.Source = source.Id
                                                        JOIN UnfilteredPeptideSpectrumMatch psm ON s.Id = psm.Spectrum
                                                        JOIN Peptide p ON p.Id = psm.Peptide
                                                        JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                                                        JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                                                        WHERE (scoreName.Name = " + "'" + searchScore1Name + "'" + " AND psmScore.Value " + searchScore1Order + searchScore1Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore2Name + "'" + " AND psmScore.Value " + searchScore2Order + searchScore2Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore3Name + "'" + " AND psmScore.Value " + searchScore3Order + searchScore3Threshold.ToString() + ")" +
                                                                       " GROUP BY s.Id"
                                                                   ).List<object[]>();
            var foundSpectraList = session.CreateSQLQuery(@"SELECT distinct spectrum FROM PeptideSpectrumMatch").List<object>();
            var foundSpectra = new HashSet<long>();
            {
                long tempLong;
                foreach (var item in foundSpectraList)
                    if (long.TryParse(item.ToString(), out tempLong))
                        foundSpectra.Add(tempLong);
            }

            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName).ToList();
            ////converted IOrderedEnumerable to List, the former one may end up with multiple enumeration, each invokes constructor, resulting a fresh set of object

            /*
             * extract peaks for each spectrum, spectrumRows was sorted by SourceName
            */
            string currentSourceName = null;
            string currentSourcePath = null;
            msdata.MSData msd = null;
            int spectrumRowsCount = spectrumRows.Count();
            //Set<long> processedSpectrumIDs = new Set<long>();

            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            reportStatus(string.Format("Extracting peaks for {0} spectra ... ", spectrumRowsCount));
            lock (owner)
                for (int i = 0; i < spectrumRowsCount; ++i)
                {
                    if (_bgWorkerClustering.CancellationPending)
                    {
                        _bgWorkerCancelled = true;
                        return;
                    }

                    var row = spectrumRows.ElementAt(i);

                    reportProgress((int)(((double)(i + 1) / (double)spectrumRowsCount) * 100), string.Format("Extracting peaks ({0}/{1}) from {2}", i + 1, spectrumRowsCount, row.SourceName));

                    //if (processedSpectrumIDs.Contains(row.SpectrumId))
                    //    break;
                    if (row.SourceName != currentSourceName)
                    {
                        currentSourceName = row.SourceName;
                        currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                        if (msd != null)
                            msd.Dispose();
                        msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                        SpectrumListFactory.wrap(msd, "threshold count 100 most-intense"); //only keep the top 100 peaks
                        //SpectrumListFactory.wrap(msd, "threshold bpi-relative .5 most-intense"); //keep all peaks that are at least 50% of the intensity of the base peak
                        //SpectrumListFactory.wrap(msd, "threshold tic-cutoff .95 most-intense"); //keep all peaks that count for 95% TIC
                        //threshold <count|count-after-ties|absolute|bpi-relative|tic-relative|tic-cutoff> <threshold> <most-intense|least-intense> [int_set(MS levels)]
                    }

                    var spectrumList = msd.run.spectrumList;
                    var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.SpectrumNativeID), true); //may create indexoutofrange error if no spectrum nativeID                   
                    row.OriginalMZs = pwizSpectrum.getMZArray().data; //getMZArray().data returns IList<double>
                    row.OriginalIntensities = pwizSpectrum.getIntensityArray().data;
                    //processedSpectrumIDs.Add(row.SpectrumId);

                }

            /* 
             * re-sort spectrumRows by precursorMZ
             * walk through each spectrum. compare similarity to all other spectra within the precursorMZTolerance 
             * (e.g. compare 1 to 2,3,4, then 2 to 3,4,5, then 3 to 4,5 etc), 
             * if above similarityThreshold, add link edge to BOTH spectra
             * merge all connected spectra to a cluster             
            */
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            reportStatus("Computing similarities... ");
            var spectrumRowsOrderByPrecursorMZ = (from randomVar in spectrumRows orderby randomVar.PrecursorMZ select randomVar).ToList();
            LinkMap linkMap = new LinkMap(); //// spectrum Id as key, directly linked spectra as value
            double similarityScore = 0;
            lock (owner)
                for (int i = 0; i < spectrumRowsCount; ++i)
                {
                    if (_bgWorkerClustering.CancellationPending)
                    {
                        _bgWorkerCancelled = true;
                        return;
                    }

                    var row = spectrumRowsOrderByPrecursorMZ.ElementAt(i);

                    reportProgress((int)(((double)(i + 1) / (double)spectrumRowsCount) * 100), "Computing similarities");
                    for (int j = i + 1; j < spectrumRowsCount; ++j)
                    {
                        var nextRow = spectrumRowsOrderByPrecursorMZ.ElementAt(j);

                        if (Math.Abs(row.PrecursorMZ - nextRow.PrecursorMZ) > precursorMzTolerance)
                        {
                            break;
                        }
                        else
                        {
                            ////compare pairwise similarity, link spectra passing threshold to both spectrum
                            Peaks rowPeakList = new Peaks(row.OriginalMZs, row.OriginalIntensities);
                            Peaks nextRowPeakList = new Peaks(nextRow.OriginalMZs, nextRow.OriginalIntensities);
                            //// converting peak intensities to sqrt here is 5-fold slower than doing this in DotProductCompareTo function
                            //Peaks rowPeakList = new Peaks(row.OriginalMZs, row.OriginalIntensities.Select(o => Math.Sqrt(o)).ToList());
                            //Peaks nextRowPeakList = new Peaks(nextRow.OriginalMZs, nextRow.OriginalIntensities.Select(o => Math.Sqrt(o)).ToList());
                            similarityScore = ClusteringAnalysis.DotProductCompareTo(rowPeakList, nextRowPeakList, fragmentMzTolerance);
                            //reportStatus("similarity between " + row.SpectrumNativeID + " and " + nextRow.SpectrumNativeID + " is " + similarityScore.ToString() + "\r\n");
                            if (similarityScore >= similarityThreshold)
                            {
                                linkMap[(long)row.SpectrumId].Add((long)nextRow.SpectrumId);
                                linkMap[(long)nextRow.SpectrumId].Add((long)row.SpectrumId); //// if a -> b, then b -> a  
                            }
                        }
                    }
                }
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));

            reportStatus("Clustering spectra... ");
            reportProgress(-1, "Clustering spectra");
            linkMap.GetMergedLinkList();
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));

            //// print clustered spectra
            //foreach (var cluster in linkMap.MergedLinkList)
            //{
            //    reportStatus("Number of spectra in cluster: " + cluster.Count().ToString() + "\r\n");
            //    foreach (var sID in cluster)
            //    {
            //        var nativeID = (from o in spectrumRows where o.SpectrumId == sID select o.SpectrumNativeID).First();
            //        reportStatus(nativeID.ToString() + "\t");
            //    }
            //    reportStatus("\r\n");
            //}

            ////free some memory
            queryRows.Clear();
            queryRows = null;
            msd.Dispose();
            msd = null;
            spectrumRows.Clear();
            spectrumRows = null;
            spectrumRowsOrderByPrecursorMZ.Clear();
            spectrumRowsOrderByPrecursorMZ = null;

            /* 
             * Go through each cluster, rescue PSMs if spectra in the same cluster were identified as the same peptide (id)
             */
            List<Set<long>> clusterSetList = (from o in linkMap.MergedLinkList where o.Count >= minClusterSize select o).ToList();    //// each element in the list is a set of clustered spectrum Ids, select sets with at least minClusterSize element           
            int clusterSetListCount = clusterSetList.Count();
            var allSpectrumIDs = (from o in clusterSetList from j in o select j).ToList();
            reportStatus(string.Format("Number of clusters: {0} \r\n", clusterSetListCount));
            reportStatus(string.Format("Number of spectra clustered: {0}/{1} ({2:0.0%}) \r\n", allSpectrumIDs.Count, spectrumRowsCount, (double)allSpectrumIDs.Count / spectrumRowsCount));

            IList<object> identPSMQueryRows;
            lock (session)
                identPSMQueryRows = session.CreateSQLQuery(@"SELECT psm.Id FROM PeptideSpectrumMatch psm").List<object>();

            var identPSMIdSet = new Set<long>(identPSMQueryRows.Select(o => (long)o));
            reportStatus(string.Format("Number of PSMs identified: {0} \r\n", identPSMIdSet.Count));

            //// create a temp table to store clustered spectrum IDs
            session.CreateSQLQuery(@"DROP TABLE IF EXISTS TempSpecIds;
                                     CREATE TEMP TABLE TempSpecIds (Id INTEGER PRIMARY KEY)
                                    ").ExecuteUpdate();

            var insertTempSpecIdscmd = session.Connection.CreateCommand();
            insertTempSpecIdscmd.CommandText = "INSERT INTO TempSpecIds VALUES (?)";
            var insertTempSpecIdsParameters = new List<System.Data.IDbDataParameter>();
            for (int i = 0; i < 1; ++i)
            {
                insertTempSpecIdsParameters.Add(insertTempSpecIdscmd.CreateParameter());
                insertTempSpecIdscmd.Parameters.Add(insertTempSpecIdsParameters[i]);
            }
            insertTempSpecIdscmd.Prepare();
            foreach (var id in allSpectrumIDs)
            {
                insertTempSpecIdsParameters[0].Value = id;
                insertTempSpecIdscmd.ExecuteNonQuery();
            }


            IList<object> allPsmIdQueryRows;
            lock (session)
                //// SQL query to retrieve all psm id for clustered spectra with score above a threshold
                allPsmIdQueryRows = session.CreateSQLQuery(@"SELECT GROUP_CONCAT(psm.Id)
                                                        FROM TempSpecIds
                                                        JOIN UnfilteredPeptideSpectrumMatch psm ON TempSpecIds.Id = psm.Spectrum
                                                        JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                                                        JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                                                        WHERE psm.Rank <= " + maxRank.ToString() + " AND ((scoreName.Name = " + "'" + searchScore1Name + "'" + " AND psmScore.Value " + searchScore1Order + searchScore1Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore2Name + "'" + " AND psmScore.Value " + searchScore2Order + searchScore2Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore3Name + "'" + " AND psmScore.Value " + searchScore3Order + searchScore3Threshold.ToString() + "))" +
                                                        " GROUP BY TempSpecIds.Id, psm.Charge"
                                                    ).List<object>();

            var allPsmIdsRows = allPsmIdQueryRows.Select(o => new PsmIdRow(o)).ToList();

            Set<long> allPsmIds = new Set<long>();
            foreach (var row in allPsmIdsRows)
            {
                allPsmIds.Union(row.PsmIds);
            }

            session.CreateSQLQuery(@"DROP TABLE IF EXISTS TempSpecIds").ExecuteUpdate();

            reportStatus("Querying PSMs...");
            reportProgress(-1, "Querying PSMs");
            IList<object[]> allClusterQueryRows;

            //// create a temp table to store psm IDs
            session.CreateSQLQuery(@"DROP TABLE IF EXISTS TempPsmIds;
                                     CREATE TEMP TABLE TempPsmIds (Id INTEGER PRIMARY KEY)
                                    ").ExecuteUpdate();

            var cmd = session.Connection.CreateCommand();
            cmd.CommandText = "INSERT INTO TempPsmIds VALUES (?)";
            var parameters = new List<System.Data.IDbDataParameter>();
            for (int i = 0; i < 1; ++i)
            {
                parameters.Add(cmd.CreateParameter());
                cmd.Parameters.Add(parameters[i]);
            }
            cmd.Prepare();
            foreach (var id in allPsmIds)
            {
                parameters[0].Value = id;
                cmd.ExecuteNonQuery();
            }

            //// qurey string for revison 286, no DecoySequence in Peptide table
            //            string queryCmd = @"SELECT psm.Id as psmId, s.Id, source.Name, s.NativeID, psm.Rank, psm.Charge, psmScore.Value, IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || ':' || mod.MonoMassDelta),''),
            //                                    (SELECT SUBSTR(pro.Sequence, pi.Offset+1, pi.Length)
            //                                                                FROM PeptideInstance pi
            //                                                                JOIN ProteinData pro ON pi.Protein=pro.Id
            //                                                                WHERE pi.Protein=pro.Id AND
            //                                                                  pi.Id=(SELECT MIN(pi2.Id)
            //                                                                         FROM PeptideInstance pi2
            //                                                                         WHERE psm.Peptide=pi2.Peptide))
            //                                    FROM TempIDs tempIDs
            //                                    JOIN Spectrum s ON s.Id = tempIDs.Id
            //                                    JOIN SpectrumSource source ON s.Source = source.Id
            //                                    JOIN PeptideSpectrumMatch psm ON s.Id = psm.Spectrum
            //                                    LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
            //                                    LEFT JOIN Modification mod ON pm.Modification = mod.Id
            //                                    JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
            //                                    JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
            //                                    WHERE scoreName.Name = " + "'" + searchScoreName + "'" + " AND psm.Rank <= 5" +
            //                                " GROUP BY psm.Id";
            //AND s.Id IN ( " + String.Join(",", allSpectrumIDs.Select(o => o.ToString()).ToArray()) + " ) " +

            //// query string for revison 288, added DecoySequence in Peptide table
            //            string queryCmd = @"SELECT psm.Id as psmId, s.Id, source.Name, s.NativeID, psm.Rank, psm.Charge, psmScore.Value, IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || ':' || mod.MonoMassDelta),''),
            //                                    (SELECT IFNULL(SUBSTR(pro.Sequence, pi.Offset+1, pi.Length), (SELECT DecoySequence FROM Peptide p WHERE p.Id = pi.Peptide))
            //                                            FROM PeptideInstance pi
            //                                            LEFT JOIN ProteinData pro ON pi.Protein=pro.Id
            //                                            WHERE pi.Id=(SELECT pi2.Id FROM PeptideInstance pi2 WHERE pi2.Peptide=psm.Peptide LIMIT 1))
            //                                    FROM TempIDs tempIDs
            //                                    JOIN Spectrum s ON s.Id = tempIDs.Id
            //                                    JOIN SpectrumSource source ON s.Source = source.Id
            //                                    JOIN PeptideSpectrumMatch psm ON s.Id = psm.Spectrum
            //                                    LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
            //                                    LEFT JOIN Modification mod ON pm.Modification = mod.Id
            //                                    JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
            //                                    JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
            //                                    WHERE scoreName.Name = " + "'" + searchScoreName + "'" + " AND psm.Rank <= 5" +
            //                                " GROUP BY psm.Id";

            ////query string for revision 291, retrive by PSM Ids
            //            string queryCmd = @"SELECT psm.Id as psmId, psm.Peptide,s.Id, source.Name, s.NativeID, psm.Charge, IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || ':' || mod.MonoMassDelta),''),
            //                                    (SELECT IFNULL(SUBSTR(pd.Sequence, pi.Offset+1, pi.Length), (SELECT DecoySequence FROM UnfilteredPeptide p WHERE p.Id = pi.Peptide))),
            //                                    GROUP_CONCAT(pro.Accession),psm.QValue, psm.Rank, psmScore.Value, analysis.Id
            //                                    FROM TempPsmIds tempPsmIds
            //                                    JOIN UnfilteredPeptideSpectrumMatch psm ON psm.Id = tempPsmIds.Id 
            //                                    JOIN Analysis analysis ON psm.Analysis = analysis.Id
            //                                    JOIN Spectrum s ON s.Id = psm.Spectrum
            //                                    JOIN SpectrumSource source ON s.Source = source.Id
            //                                    JOIN UnfilteredPeptideInstance pi ON psm.Peptide = pi.Peptide
            //                                    JOIN UnfilteredProtein pro ON pi.Protein = pro.Id
            //                                    LEFT JOIN ProteinData pd ON pi.Protein=pd.Id
            //                                    LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
            //                                    LEFT JOIN Modification mod ON pm.Modification = mod.Id
            //                                    LEFT JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
            //                                    LEFT JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
            //                                    WHERE scoreName.Name = " + "'" + searchScore1Name + "'" +
            //                                    " GROUP BY psm.Id";

            // query for r291, fix no seq for some peptides shared by target and decoy proteins, query seq for target and decoy proteins separately then union
            string queryCmd = @"SELECT psm.Id as psmId, psm.Peptide,s.Id, source.Name, s.NativeID, psm.Charge, 
                                        IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || ':' || mod.MonoMassDelta),''),
                                        IFNULL(IFNULL(SUBSTR(pd.Sequence, pi.Offset+1, pi.Length),(SELECT DecoySequence FROM UnfilteredPeptide p WHERE p.Id = pi.Peptide)),
                                                (SELECT SUBSTR(pd.Sequence, pi.Offset+1, pi.Length)
                                                FROM UnfilteredPeptideInstance pi 
                                                JOIN UnfilteredProtein pro ON pi.Protein = pro.Id AND pro.IsDecoy = 0
                                                LEFT JOIN ProteinData pd ON pi.Protein=pd.Id
                                                WHERE psm.Peptide = pi.Peptide
                                                UNION
                                                SELECT p.DecoySequence
                                                FROM UnfilteredPeptide p
                                                JOIN UnfilteredPeptideInstance pi ON p.Id = pi.Peptide
                                                JOIN UnfilteredProtein pro ON pi.Protein = pro.Id AND pro.IsDecoy = 1
                                                WHERE psm.Peptide = pi.Peptide AND p.DecoySequence is not null)),
                                        GROUP_CONCAT(pro.Accession),
                                        psm.QValue, psm.Rank, psmScore.Value, psm.Analysis
                                        FROM TempPsmIds tempPsmIds
                                        JOIN UnfilteredPeptideSpectrumMatch psm ON psm.Id = tempPsmIds.Id 
                                        JOIN UnfilteredSpectrum s ON s.Id = psm.Spectrum
                                        JOIN SpectrumSource source ON s.Source = source.Id
                                        JOIN UnfilteredPeptideInstance pi ON psm.Peptide = pi.Peptide
                                        JOIN UnfilteredProtein pro ON pi.Protein = pro.Id
                                        LEFT JOIN ProteinData pd ON pi.Protein=pd.Id
                                        LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
                                        LEFT JOIN Modification mod ON pm.Modification = mod.Id
                                        LEFT JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                                        LEFT JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                                        WHERE scoreName.Name in ( " + "'" + searchScore1Name + "','" + searchScore2Name + "','" + searchScore3Name + "')" +
                                        " GROUP BY psm.Id";

            lock (session)
                allClusterQueryRows = session.CreateSQLQuery(queryCmd).List<object[]>();
            var allClusterSpectrumRows = allClusterQueryRows.Select(o => new ClusterSpectrumRow(o)).ToList();

            session.CreateSQLQuery(@"DROP TABLE IF EXISTS TempPsmIds").ExecuteUpdate();
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            reportStatus(string.Format("Number of PSMs retrieved: {0} \r\n", allClusterSpectrumRows.Count));

            reportStatus("Rescuing PSMs... ");
            if (writeLog)
            {
                string logHeader = string.Join("\t", new string[] { "SourceName", "NativeID", "Charge", "RescuedSequence", "Protein", "ScoreName", "SearchScore", "BAScore", "QValue", "Rank", "Rank1Sequence", "Rank1Protein", "Rank1SearchScore", "Rank1BAScore", "Rank1Qvalue", "\r\n" });
                File.WriteAllText(logFile, logHeader);
            }

            Dictionary<long, UpdateValues> updateDict = new Dictionary<long, UpdateValues>();  ////key: Id in unfiltered psm table, value: reassigned Qvalue and reassinged Rank
            Set<long> rescuedDistinctSpectraIds = new Set<long>();

            //// SQL query to retrieve anlaysis Id and search score order in QonvertSettings table
            IList<object[]> qonvertSettingsQueryRows;
            lock (session)
                qonvertSettingsQueryRows = session.CreateSQLQuery("SELECT Id, ScoreInfoByName FROM QonverterSettings").List<object[]>();
            var qonvertSettingRows = qonvertSettingsQueryRows.Select(o => new qonvertSettingRows(o)).ToList();
            Dictionary<long, string> analysisScoreOrder = new Dictionary<long, string>();
            Dictionary<long, string> analysisScoreName = new Dictionary<long, string>();
            foreach (var qonvertSettingRow in qonvertSettingRows)
            {
                analysisScoreOrder.Add(qonvertSettingRow.Id, qonvertSettingRow.ScoreOrder);
                analysisScoreName.Add(qonvertSettingRow.Id, qonvertSettingRow.ScoreName);
            }

            ////walk through each cluster to rescue PSMs
            for (int i = 0; i < clusterSetListCount; ++i)
            {
                var clusterSet = clusterSetList.ElementAt(i);

                if (_bgWorkerClustering.CancellationPending)
                {
                    _bgWorkerCancelled = true;
                    return;
                }

                //reportStatus("Clustering set: " + String.Join(",",clusterSet.Select(j => j.ToString()).ToArray()) + "\r\n");
                reportProgress((int)(((double)(i + 1) / (double)clusterSetListCount) * 100), "Rescuing PSMs");
                var clusterSpectrumRows = (from o in allClusterSpectrumRows where clusterSet.Contains(o.SpectrumId) select o).ToList();
                //Map<long, Set<long>> peptideIdDict = new Map<long, Set<long>>(); //key: peptide id, value: psm ids
                //Set<long> unprocessedPSMIds = new Set<long>();
                Set<string> unprocessedSpecChargeAnalysisSet = new Set<string>();  //spectrumId.charge.analysis

                var pepSeqDict = new PepDictionary();  //key: modified peptide sequence, value: spectrumId.charge.analysis, score
                //var peptideIdDict = new PepDictionary(); //key: peptide ID, value: PSM Ids and scores

                foreach (var row in clusterSpectrumRows)
                {
                    //peptideIdDict.Add(row.PeptideId,row.PSMId, row.SearchScore);
                    //peptideIdDict[row.PeptideId].Add(row.PSMId);
                    pepSeqDict.Add(row.ModifiedSequence, row.SpectrumId, row.Charge, row.Analysis, row.SearchScore, row.PSMId);
                    //unprocessedPSMIds.Add(row.PSMId);
                    //unprocessedSpectrumCharge.Add(row.SpectrumId.ToString() + "." + row.Charge.ToString());
                    unprocessedSpecChargeAnalysisSet.Add(row.SpectrumId.ToString() + "." + row.Charge.ToString() + "." + row.Analysis.ToString());
                }


                pepSeqDict.ComputeBayesianAverage(analysisScoreOrder); //replace score from sum of search scores to Bayesian Average

                var sortedPepSeqDictKeys = from k in pepSeqDict.Keys orderby pepSeqDict[k].FinalScore descending, pepSeqDict[k].PsmIdSpecDict.Count() descending select k; // sort by score, if tied, second sort by # of linked psms

                foreach (var pepSeq in sortedPepSeqDictKeys)
                {
                    if (unprocessedSpecChargeAnalysisSet.Count == 0)
                        break;

                    if (pepSeqDict[pepSeq].PsmIdSpecDict.Keys.Any(pId => identPSMIdSet.Contains(pId))) ////at least one psm identified as this peptide in this cluster
                    {
                        foreach (var psmId in pepSeqDict[pepSeq].PsmIdSpecDict.Keys)
                        {
                            var row = (from o in clusterSpectrumRows where o.PSMId == psmId select o).First();
                            string spec = row.SpectrumId.ToString() + "." + row.Charge.ToString() + "." + row.Analysis.ToString();
                            if (unprocessedSpecChargeAnalysisSet.Contains(spec))
                            {
                                if (identPSMIdSet.Contains(psmId) || foundSpectra.Contains(row.SpectrumId))
                                {
                                    //// not process ident PSMs
                                    unprocessedSpecChargeAnalysisSet.Remove(spec);
                                }
                                else
                                {
                                    updateDict.Add(psmId, new UpdateValues(-1, 1)); //// update Qvalue = -1, Rank =1
                                    ++rescuedPSMsCount;
                                    rescuedDistinctSpectraIds.Add(row.SpectrumId);
                                    unprocessedSpecChargeAnalysisSet.Remove(spec);

                                    if (writeLog)
                                    {
                                        string originalRank1Seq = "";
                                        string originalRank1Protein = "";
                                        string originalRank1Score = "";
                                        string originalRank1BAScore = "";
                                        string originalRank1Qvalue = "";

                                        if (row.Rank != 1)
                                        {
                                            var originalRank1Rows = (from o in clusterSpectrumRows where o.SpectrumId == row.SpectrumId && o.Rank == 1 && o.Charge == row.Charge && o.Analysis == row.Analysis select new { o.ModifiedSequence, o.Protein, o.SearchScore, o.QValue }).ToList(); ////may exist more than one rank1 hits
                                            foreach (var originalRank1Row in originalRank1Rows)
                                            {
                                                originalRank1Seq += originalRank1Row.ModifiedSequence + ";";
                                                originalRank1Protein += originalRank1Row.Protein + ";";
                                                originalRank1Score += originalRank1Row.SearchScore.ToString("0.0000") + ";";
                                                originalRank1BAScore += pepSeqDict.ContainsKey(originalRank1Row.ModifiedSequence) ? pepSeqDict[originalRank1Row.ModifiedSequence].FinalScore.ToString("0.0000") + ";" : "";
                                                originalRank1Qvalue += originalRank1Row.QValue.ToString("0.0000") + ";";
                                            }
                                        }
                                        string logLine = string.Join("\t", new string[] { row.SourceName, row.SpectrumNativeID, row.Charge.ToString(), row.ModifiedSequence, row.Protein, analysisScoreName[row.Analysis], row.SearchScore.ToString("0.0000"), pepSeqDict[pepSeq].FinalScore.ToString("0.0000"), row.QValue.ToString("0.0000"), row.Rank.ToString(), originalRank1Seq, originalRank1Protein, originalRank1Score, originalRank1BAScore, originalRank1Qvalue });
                                        using (StreamWriter sw = File.AppendText(logFile))
                                        {
                                            sw.WriteLine(logLine);
                                        }
                                    }
                                }
                            }
                        }
                    }
                } //// end of foreach (var pepSeq in sortedPepSeqDictKeys)

            } //// end of for (int i = 0; i < clusterSetListCount; ++i)
            reportStatus(string.Format("{0} seconds elapsed\r\n", (DateTime.Now - startTime).TotalSeconds));

            /*
             *update unfiltered psm table in idpDB
            */
            if (rescuedPSMsCount == 0)
                return;

            reportStatus("Updating idpDB... ");

            session.Transaction.Begin();
            //basicDataFilter.DropFilters(session);  // tables were dropped before querying
            var updateCmd = session.Connection.CreateCommand();
            updateCmd.CommandText = "UPDATE UnfilteredPeptideSpectrumMatch SET QValue = ?, Rank = ? WHERE Id = ?";
            var updateParameters = new List<System.Data.IDbDataParameter>();
            for (int i = 0; i < 3; ++i)
            {
                updateParameters.Add(updateCmd.CreateParameter());
                updateCmd.Parameters.Add(updateParameters[i]);
            }
            updateCmd.Prepare();
            int updateCount = 0;
            int allUpdateCount = updateDict.Count;
            foreach (KeyValuePair<long, UpdateValues> pair in updateDict)
            {
                updateParameters[0].Value = pair.Value.ReassignedQvalue;   //// Qvalue
                updateParameters[1].Value = pair.Value.ReassignedRank;   //// Rank
                updateParameters[2].Value = pair.Key;    //// psm id
                updateCmd.ExecuteNonQuery();
                reportProgress((int)(((double)(updateCount + 1) / (double)allUpdateCount) * 100), "Updating idpDB");
                ++updateCount;
            }
            session.Transaction.Commit();

            //basicDataFilter.ApplyBasicFilters(session);
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            reportStatus(string.Format("Rescued {0} PSMs for {1} distinct spectra\r\n", rescuedPSMsCount, rescuedDistinctSpectraIds.Count));
            reportProgress(0, "Ready");
            /*
             * not recompute q values, reload idpDB, implemented in _bgWorkerClustering_RunWorkerCompleted
            */

        } //// end of RescuePSMsByClustering
Esempio n. 7
0
        private List<double> getPeakStatistics ()
        {
            IList<object[]> queryRows;
            lock (session)
            {
                var randomIds = session.CreateQuery("SELECT psm.Id " + viewFilter.GetFilteredQueryString(DataFilter.FromPeptideSpectrumMatch))
                                       .List<long>()
                                       .Shuffle()
                                       .Take(1000)
                                       .OrderBy(o => o);
                string randomIdSet = String.Join(",", randomIds.Select(o => o.ToString()).ToArray());
                queryRows = session.CreateQuery("SELECT psm.Spectrum.Source.Name, psm.Spectrum, psm, DISTINCT_GROUP_CONCAT(pm.Offset || ':' || mod.MonoMassDelta), psm.Peptide.Sequence " +
                                                "FROM PeptideSpectrumMatch psm " +
                                                "LEFT JOIN psm.Modifications pm " +
                                                "LEFT JOIN pm.Modification mod " +
                                                "WHERE psm.Id IN (" + randomIdSet + ") " +
                                                "GROUP BY psm.Spectrum.id ")
                                   .List<object[]>();
            }
            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName);

            precursorScatterPlot.Clear();
            chargeReducedScatterPlot.Clear();

            int spectraCount = 0;

            string spectrumListFilters = String.Empty;
            Invoke(new MethodInvoker(() =>
            {
                spectrumListFilters = spectrumFiltersTextBox.Text;
                zedGraphControl.MasterPane.AxisChange();
                zedGraphControl.Refresh();
            }));

            var points = new PointPairList();

            string currentSourceName = null;
            string currentSourcePath = null;
            msdata.MSData msd = null;

            lock(owner)
            foreach (var row in spectrumRows)
            {
                if (row.SourceName != currentSourceName)
                {
                    currentSourceName = row.SourceName;
                    currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                    msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                    //var param = session.Query<AnalysisParameter>().Where(o => o.Name == "SpectrumListFilters").Min(o => o.Value);
                    //string spectrumListFilters = String.IsNullOrEmpty(param) ? String.Empty : param;
                    SpectrumListFactory.wrap(msd, spectrumListFilters.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries));
                }

                string label = String.Format("{0}/{1}\n{2}", row.SourceName, msdata.id.abbreviate(row.Spectrum.NativeID), row.ModifiedSequence);

                var spectrumList = msd.run.spectrumList;

                ++spectraCount;

                var pwizPeptide = new proteome.Peptide(row.ModifiedSequence, proteome.ModificationParsing.ModificationParsing_Auto,
                                                       proteome.ModificationDelimiter.ModificationDelimiter_Brackets);
                var fragmentation = pwizPeptide.fragmentation(true, true);

                var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.Spectrum.NativeID), true);
                var pointMap = new seems.PointMap(new ZedGraph.PointPairList(pwizSpectrum.getMZArray().data, pwizSpectrum.getIntensityArray().data));
                double tic = pointMap.Values.Sum();

                double precursorMz = row.Spectrum.PrecursorMZ;
                double chargeReducedPrecursorMz = precursorMz * row.PeptideSpectrumMatch.Charge;

                bool plotMatchedPeaks = true;
                bool removeMatchedPeaks = false;

                double tolerance = 0.03;
                seems.PointMap.Enumerator itr;
                IonSeries[] ionSeries = Enum.GetValues(typeof(IonSeries)).Cast<IonSeries>().Where(o => o != IonSeries.Count).ToArray();

                for (int z = 1; z <= 1; ++z)
                for (int length = 1, end = pwizPeptide.sequence.Length; length <= end; ++length)
                {
                    string NTermFragment = row.ModifiedSequence.Substring(0, length);
                    string CTermFragment = row.ModifiedSequence.Substring(row.ModifiedSequence.Length - length);

                    foreach (IonSeries series in ionSeries)
                    {
                        if ((series == IonSeries.c || series == IonSeries.cMinus1 || series == IonSeries.x) &&
                            length == pwizPeptide.sequence.Length)
                            continue;

                        itr = pointMap.FindNear(fragmentMass(fragmentation, series, length, z), tolerance);
                        if (itr != null && itr.IsValid)
                        {
                            if (plotMatchedPeaks)
                            {
                                precursorScatterPlot.AddPoint(new PointPair(itr.Current.Key - precursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, precursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length)));
                                chargeReducedScatterPlot.AddPoint(new PointPair(itr.Current.Key - chargeReducedPrecursorMz, itr.Current.Value / tic, (int)series, String.Format("{0} {1}\n{2} {3} {4} {5}", label, chargeReducedPrecursorMz, NTermFragment, itr.Current.Key, IonSeriesLabels[(int)series], length)));
                            }

                            if (removeMatchedPeaks)
                                pointMap.Remove(itr);
                        }
                    }
                }

                foreach (var pair in pointMap)
                {
                    precursorScatterPlot.AddPoint(new PointPair(pair.Key - precursorMz, pair.Value/tic, 0, label));
                    chargeReducedScatterPlot.AddPoint(new PointPair(pair.Key - chargeReducedPrecursorMz, pair.Value/tic, 0, label));
                }

                if ((spectraCount % 100) == 0)
                {
                    Invoke(new MethodInvoker(() =>
                    {
                        zedGraphControl.MasterPane.AxisChange();
                        zedGraphControl.Refresh();
                    }));
                }
            }

            Invoke(new MethodInvoker(() =>
            {
                if (!lockZoomCheckBox.Checked)
                {
                    zedGraphControl.ZoomOutAll(zedGraphControl.GraphPane);
                }
                zedGraphControl.MasterPane.AxisChange();
                zedGraphControl.Refresh();
            }));
            return new List<double>(); //percentTicBySpectrumByFragmentType[1];
        }