Ejemplo n.º 1
0
        public void CreateFilter()
        {
            var filterLogs = new List <int?>();
            var filters    = new ISequenceFilter <DummySource, DummyContext>[] {
                new SpySequenceFilter(() => filterLogs.Add(0)),
                new SpySequenceFilter(() => filterLogs.Add(1)),
                new SpySequenceFilter(() => filterLogs.Add(2)),
            };

            new TestCaseRunner()
            .Run(() => Pipeline.CreateFilter(filters))
            .Verify((actual, _) => {
                var source       = new DummySource[] { new DummySource(), new DummySource() };
                var context      = new DummyContext();
                var actualSource = actual(source, context).GetAwaiter().GetResult();

                CollectionAssert.AreEqual(new int[] { 0, 1, 2 }, filterLogs, "フィルターの適用順序が一致しません。");
                Assert.AreEqual(source, actualSource, "入力シーケンスが出力に素通しされていません。");
            }, (Type)null);
        }
Ejemplo n.º 2
0
        private SortedSet<DatabaseSequenceSpectrumMatch>[] RunSearch(IEnumerable<AnnotationAndOffset> annotationsAndOffsets, ISequenceFilter ms1Filter, bool isDecoy)
        {
            var sw = new Stopwatch();
            var numPeptides = 0;
            sw.Reset();
            sw.Start();

            var matches = new SortedSet<DatabaseSequenceSpectrumMatch>[_run.MaxLcScan + 1];

            // TODO: N-term Met cleavage
            foreach (var annotationAndOffset in annotationsAndOffsets)
            {
                ++numPeptides;

                var annotation = annotationAndOffset.Annotation;
                var offset = annotationAndOffset.Offset;

                if (numPeptides % 100000 == 0)
                {
                    Console.Write(@"Processing {0}{1} peptides...", numPeptides,
                        numPeptides == 1 ? "st" : numPeptides == 2 ? "nd" : numPeptides == 3 ? "rd" : "th");
                    if (numPeptides != 0)
                    {
                        sw.Stop();
                        var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                        Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);
                        sw.Reset();
                        sw.Start();
                    }
                }

                var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, annotation);
                if (seqGraph == null)
                {
                    //                    Console.WriteLine("Ignoring illegal protein: {0}", annotation);
                    continue;
                }
                

                //var protCompositions = seqGraph.GetSequenceCompositions();
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass = protCompositionWithH2O.Mass;
                    var modCombinations = modCombs[modIndex];

                    foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass))
                    {
                        var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                        if (spec == null) continue;
                        var charge =
                            (int)Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton));
                        var scorer = _ms2ScorerFactory.GetMs2Scorer(ms2ScanNum);
                        var score = seqGraph.GetFragmentScore(scorer);
                        if (score <= 2) continue;

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence = annotation.Substring(2, annotation.Length - 4);
                        var pre = annotation[0];
                        var post = annotation[annotation.Length - 1];
                        var prsm = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, 0, modCombinations,
                            precursorIon, score, isDecoy);

                        if (matches[ms2ScanNum] == null)
                        {
                            matches[ms2ScanNum] = new SortedSet<DatabaseSequenceSpectrumMatch> { prsm };
                        }
                        else // already exists
                        {
                            var existingMatches = matches[ms2ScanNum];
                            if (existingMatches.Count < NumMatchesPerSpectrum) existingMatches.Add(prsm);
                            else
                            {
                                var minScore = existingMatches.Min.Score;
                                if (score > minScore)
                                {
                                    existingMatches.Add(prsm);
                                    existingMatches.Remove(existingMatches.Min);
                                }
                            }
                        }
                    }
                }
            }

            return matches;
        }
Ejemplo n.º 3
0
        public void FilteringEfficiencyQcShew()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm       = 10;
            var       tolerance          = new Tolerance(tolerancePpm);

            sw.Reset();
            sw.Start();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40);

            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;

            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var          minBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var          maxBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var          numComparisons = 0L;

            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            //const string prot =
            //    "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA";
            //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass;
            //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass)));

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var scanNums  = tsvReader.GetData("Scan(s)");
            var charges   = tsvReader.GetData("Charge");
            var scores    = tsvReader.GetData("E-value");
            var sequences = tsvReader.GetData("Peptide");

            //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv";
            //var tsvReader = new TsvFileParser(resultFilePath);
            //var scanNums = tsvReader.GetData("ScanNum");
            //var charges = tsvReader.GetData("Charge");
            //var scores = tsvReader.GetData("Score");
            //var sequences = tsvReader.GetData("Sequence");

            var aaSet = new AminoAcidSet();

            var seqSet             = new HashSet <string>();
            var allSeqSet          = new HashSet <string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs         = 0;

            for (var i = 0; i < scores.Count; i++)
            {
                var score = Convert.ToDouble(scores[i]);
                if (score > 1E-4)
                {
                    continue;
                }
                //if (score < 10) continue;

                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge  = Convert.ToInt32(charges[i]);

                var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                if (sequence == null || sequence.Contains("("))
                {
                    continue;
                }
                //var sequence = sequences[i];
                var composition = aaSet.GetComposition(sequence) + Composition.H2O;

                var precursorIon = new Ion(composition, charge);
                var isValid      = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid)
                {
                    continue;
                }
                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec    = run.GetSpectrum(precursorScanNum);
                var corr1            = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec    = run.GetSpectrum(nextScanNum);
                var corr2       = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 4
0
        public void FilteringEfficiency()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm       = 10;
            var       tolerance          = new Tolerance(tolerancePpm);

            sw.Reset();
            sw.Start();
            //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm);
            //
            //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20);
            //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run,
            //    minPrecursorCharge, maxPrecursorCharge,
            //    0, 0,
            //    600.0, 1800.0, new Tolerance(tolerancePpm), null);
            //ms1BasedFilter.CachePrecursorMatchesBinCentric();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40);

            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;

            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var          minBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var          maxBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var          numComparisons = 0L;

            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader    = new TsvFileParser(resultFilePath);
            var compositions = tsvReader.GetData("Composition");
            var scanNums     = tsvReader.GetData("ScanNum");
            var charges      = tsvReader.GetData("Charge");
            var scores       = tsvReader.GetData("Score");
            var qvalues      = tsvReader.GetData("QValue");
            var sequences    = tsvReader.GetData("Sequence");

            var sequenceCount = new Dictionary <string, int>();

            for (var i = 0; i < compositions.Count; i++)
            {
                if (qvalues != null)
                {
                    var qValue = Convert.ToDouble(qvalues[i]);
                    if (qValue > 0.01)
                    {
                        continue;
                    }
                }
                else
                {
                    var score = Convert.ToDouble(scores[i]);
                    if (score < 13)
                    {
                        continue;
                    }
                }
                var scanNum      = Convert.ToInt32(scanNums[i]);
                var charge       = Convert.ToInt32(charges[i]);
                var composition  = Composition.Parse(compositions[i]);
                var precursorIon = new Ion(composition, charge);
                var isValid      = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid)
                {
                    continue;
                }

                var sequence = sequences[i];
                if (sequenceCount.TryGetValue(sequence, out var count))
                {
                    sequenceCount[sequence] = count + 1;
                }
                else
                {
                    sequenceCount[sequence] = 1;
                }
            }
            //var sequences = tsvReader.GetData("Annotation");

            var seqSet             = new HashSet <string>();
            var allSeqSet          = new HashSet <string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs         = 0;

            for (var i = 0; i < compositions.Count; i++)
            {
                if (qvalues != null)
                {
                    var qValue = Convert.ToDouble(qvalues[i]);
                    if (qValue > 0.01)
                    {
                        continue;
                    }
                }
                else
                {
                    var score = Convert.ToDouble(scores[i]);
                    if (score < 13)
                    {
                        continue;
                    }
                }
                var scanNum      = Convert.ToInt32(scanNums[i]);
                var charge       = Convert.ToInt32(charges[i]);
                var composition  = Composition.Parse(compositions[i]);
                var precursorIon = new Ion(composition, charge);
                var isValid      = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid)
                {
                    continue;
                }

                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec    = run.GetSpectrum(precursorScanNum);
                var corr1            = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec    = run.GetSpectrum(nextScanNum);
                var corr2       = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance);
                ////xic.Display();
                //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false);
                //var apexSpec = run.GetSpectrum(apexScanNum);
                //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 5
0
        private void RunSearch(SortedSet<DatabaseSequenceSpectrumMatch>[] matches, FastaDatabase db, ISequenceFilter sequenceFilter, CancellationToken? cancellationToken = null, IProgress<ProgressData> progress = null)
        {
            var progData = new ProgressData(progress)
            {
                Status = "Searching for matches"
            };

            var sw = new Stopwatch();
            long estimatedProteins;
            var annotationsAndOffsets = GetAnnotationsAndOffsets(db, out estimatedProteins, cancellationToken);
            Console.WriteLine(@"Estimated proteins: " + estimatedProteins);

            var numProteins = 0;
            var lastUpdate = DateTime.MinValue; // Force original update of 0%

            sw.Reset();
            sw.Start();
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken = cancellationToken ?? CancellationToken.None
            };

            var maxNumNTermCleavages = SearchMode == InternalCleavageType.NoInternalCleavage ? MaxNumNTermCleavages : 0;
            //foreach (var annotationAndOffset in annotationsAndOffsets)
            Parallel.ForEach(annotationsAndOffsets, pfeOptions, annotationAndOffset =>
            {
                if (cancellationToken != null && cancellationToken.Value.IsCancellationRequested)
                {
                    //return matches;
                    return;
                }

                SearchProgressReport(ref numProteins, ref lastUpdate, estimatedProteins, sw, progData);
                SearchForMatches(annotationAndOffset, sequenceFilter, matches, maxNumNTermCleavages, db.IsDecoy, cancellationToken);
            });

            Console.WriteLine(@"Collected candidate matches: {0}", GetNumberOfMatches(matches));

            progData.StatusInternal = string.Empty;
            progData.Report(100.0);
        }
Ejemplo n.º 6
0
        private void SearchForMatches(AnnotationAndOffset annotationAndOffset,
            ISequenceFilter sequenceFilter, SortedSet<DatabaseSequenceSpectrumMatch>[] matches, int maxNumNTermCleavages, bool isDecoy, CancellationToken? cancellationToken = null)
        {
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken = cancellationToken ?? CancellationToken.None
            };
            
            var annotation = annotationAndOffset.Annotation;
            var offset = annotationAndOffset.Offset;
            //var protein = db.GetProteinName(offset);
            var protSequence = annotation.Substring(2, annotation.Length - 4);
            var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, protSequence,
                AminoAcid.ProteinCTerm);

            if (seqGraph == null) return; // No matches will be found without a sequence graph.

            for (var numNTermCleavages = 0; numNTermCleavages <= maxNumNTermCleavages; numNTermCleavages++)
            {
                if (numNTermCleavages > 0) seqGraph.CleaveNTerm();
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass = protCompositionWithH2O.Mass;

                    if (sequenceMass < MinSequenceMass || sequenceMass > MaxSequenceMass) continue;

                    var modCombinations = modCombs[modIndex];
                    var ms2ScanNums = this.ScanNumbers ?? sequenceFilter.GetMatchingMs2ScanNums(sequenceMass);

                    Parallel.ForEach(ms2ScanNums, pfeOptions, ms2ScanNum =>
                    {
                        if (ms2ScanNum > _ms2ScanNums.Last() || ms2ScanNum < _ms2ScanNums.First()) return;
                        
                        var scorer = _ms2ScorerFactory2.GetMs2Scorer(ms2ScanNum);
                        var score = seqGraph.GetFragmentScore(scorer);
                        var isoTargetMz = _isolationWindowTargetMz[ms2ScanNum];
                        if (!(isoTargetMz > 0)) return;
                        var charge = (int)Math.Round(sequenceMass / (isoTargetMz - Constants.Proton));

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence = protSequence.Substring(numNTermCleavages);
                        var pre = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1];
                        var post = annotation[annotation.Length - 1];
                        var prsm = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, numNTermCleavages,
                            modCombinations, precursorIon, score, isDecoy);
                        
                        AddMatch(matches, ms2ScanNum, prsm);
                    });
                }
            }
        }
Ejemplo n.º 7
0
 public LcMsPeakMatrix(LcMsRun run, ISequenceFilter ms1Filter, LcMsFeatureLikelihood scorer = null, int minScanCharge = 1, int maxScanCharge = 60,
     int maxThreadCount = 0)
     : this(run, scorer, minScanCharge, maxScanCharge, maxThreadCount)
 {
     _ms1Filter = ms1Filter;
 }
Ejemplo n.º 8
0
        private SortedSet <DatabaseSequenceSpectrumMatch>[] RunSearch(IEnumerable <AnnotationAndOffset> annotationsAndOffsets, ISequenceFilter ms1Filter, bool isDecoy)
        {
            var sw          = new Stopwatch();
            var numPeptides = 0;

            sw.Reset();
            sw.Start();

            var matches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1];

            // TODO: N-term Met cleavage
            foreach (var annotationAndOffset in annotationsAndOffsets)
            {
                ++numPeptides;

                var annotation = annotationAndOffset.Annotation;
                var offset     = annotationAndOffset.Offset;

                if (numPeptides % 100000 == 0)
                {
                    Console.Write(@"Processing {0}{1} peptides...", numPeptides,
                                  numPeptides == 1 ? "st" : numPeptides == 2 ? "nd" : numPeptides == 3 ? "rd" : "th");
                    if (numPeptides != 0)
                    {
                        sw.Stop();
                        var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                        Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);
                        sw.Reset();
                        sw.Start();
                    }
                }

                var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, annotation);
                if (seqGraph == null)
                {
                    //                    Console.WriteLine("Ignoring illegal protein: {0}", annotation);
                    continue;
                }

                //var protCompositions = seqGraph.GetSequenceCompositions();
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs       = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass           = protCompositionWithH2O.Mass;
                    var modCombinations        = modCombs[modIndex];

                    foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass))
                    {
                        var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                        if (spec == null)
                        {
                            continue;
                        }
                        var charge =
                            (int)Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton));
                        var scorer = _ms2ScorerFactory.GetMs2Scorer(ms2ScanNum);
                        var score  = seqGraph.GetFragmentScore(scorer);
                        if (score <= 2)
                        {
                            continue;
                        }

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence     = annotation.Substring(2, annotation.Length - 4);
                        var pre          = annotation[0];
                        var post         = annotation[annotation.Length - 1];
                        var prsm         = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, 0, modCombinations,
                                                                             precursorIon, score, isDecoy);

                        if (matches[ms2ScanNum] == null)
                        {
                            matches[ms2ScanNum] = new SortedSet <DatabaseSequenceSpectrumMatch> {
                                prsm
                            };
                        }
                        else // already exists
                        {
                            var existingMatches = matches[ms2ScanNum];
                            if (existingMatches.Count < NumMatchesPerSpectrum)
                            {
                                existingMatches.Add(prsm);
                            }
                            else
                            {
                                var minScore = existingMatches.Min.Score;
                                if (score > minScore)
                                {
                                    existingMatches.Add(prsm);
                                    existingMatches.Remove(existingMatches.Min);
                                }
                            }
                        }
                    }
                }
            }

            return(matches);
        }
        /// <summary>
        /// <see cref="ISequenceFilter{T, TContext}"/> をフィルターパイプラインに追加します。
        /// </summary>
        /// <typeparam name="T">フィルター処理の対象となる要素の型。</typeparam>
        /// <typeparam name="TContext">フィルター処理のコンテキストを表す型。</typeparam>
        /// <param name="pipeline"><see cref="IPipelineBuilder{T, TContext}"/> インスタンス。常に非 <c>null</c>。</param>
        /// <param name="filter"><typeparamref name="T"/> のシーケンスをフィルター処理する <see cref="ISequenceFilter{T, TContext}"/> インスタンス。常に非 <c>null</c>。</param>
        /// <returns>自身の <see cref="IPipelineBuilder{T, TContext}"/> インスタンス。常に非 <c>null</c>。</returns>
        public static IPipelineBuilder <T, TContext> Add <T, TContext>(this IPipelineBuilder <T, TContext> pipeline, ISequenceFilter <T, TContext> filter)
        {
            Debug.Assert(pipeline != null);
            Debug.Assert(filter != null);

            return(pipeline.Add(filter.ExecuteAsync));
        }
Ejemplo n.º 10
0
        private void SearchForMatches(AnnotationAndOffset annotationAndOffset,
                                      ISequenceFilter sequenceFilter, SortedSet <DatabaseSequenceSpectrumMatch>[] matches, int maxNumNTermCleavages, bool isDecoy, CancellationToken?cancellationToken = null)
        {
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            var annotation = annotationAndOffset.Annotation;
            var offset     = annotationAndOffset.Offset;
            //var protein = db.GetProteinName(offset);
            var protSequence = annotation.Substring(2, annotation.Length - 4);
            var seqGraph     = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, protSequence,
                                                         AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;                   // No matches will be found without a sequence graph.
            }
            for (var numNTermCleavages = 0; numNTermCleavages <= maxNumNTermCleavages; numNTermCleavages++)
            {
                if (numNTermCleavages > 0)
                {
                    seqGraph.CleaveNTerm();
                }
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs       = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass           = protCompositionWithH2O.Mass;

                    if (sequenceMass < MinSequenceMass || sequenceMass > MaxSequenceMass)
                    {
                        continue;
                    }

                    var modCombinations = modCombs[modIndex];
                    var ms2ScanNums     = this.ScanNumbers ?? sequenceFilter.GetMatchingMs2ScanNums(sequenceMass);

                    Parallel.ForEach(ms2ScanNums, pfeOptions, ms2ScanNum =>
                    {
                        if (ms2ScanNum > _ms2ScanNums.Last() || ms2ScanNum < _ms2ScanNums.First())
                        {
                            return;
                        }

                        var scorer      = _ms2ScorerFactory2.GetMs2Scorer(ms2ScanNum);
                        var score       = seqGraph.GetFragmentScore(scorer);
                        var isoTargetMz = _isolationWindowTargetMz[ms2ScanNum];
                        if (!(isoTargetMz > 0))
                        {
                            return;
                        }
                        var charge = (int)Math.Round(sequenceMass / (isoTargetMz - Constants.Proton));

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence     = protSequence.Substring(numNTermCleavages);
                        var pre          = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1];
                        var post         = annotation[annotation.Length - 1];
                        var prsm         = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, numNTermCleavages,
                                                                             modCombinations, precursorIon, score, isDecoy);

                        AddMatch(matches, ms2ScanNum, prsm);
                    });
                }
            }
        }
Ejemplo n.º 11
0
        private void RunSearch(SortedSet <DatabaseSequenceSpectrumMatch>[] matches, FastaDatabase db, ISequenceFilter sequenceFilter, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress)
            {
                Status = "Searching for matches"
            };

            var  sw = new Stopwatch();
            long estimatedProteins;
            var  annotationsAndOffsets = GetAnnotationsAndOffsets(db, out estimatedProteins, cancellationToken);

            Console.WriteLine(@"Estimated proteins: " + estimatedProteins);

            var numProteins = 0;
            var lastUpdate  = DateTime.MinValue; // Force original update of 0%

            sw.Reset();
            sw.Start();
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            var maxNumNTermCleavages = SearchMode == InternalCleavageType.NoInternalCleavage ? MaxNumNTermCleavages : 0;

            //foreach (var annotationAndOffset in annotationsAndOffsets)
            Parallel.ForEach(annotationsAndOffsets, pfeOptions, annotationAndOffset =>
            {
                if (cancellationToken != null && cancellationToken.Value.IsCancellationRequested)
                {
                    //return matches;
                    return;
                }

                SearchProgressReport(ref numProteins, ref lastUpdate, estimatedProteins, sw, progData);
                SearchForMatches(annotationAndOffset, sequenceFilter, matches, maxNumNTermCleavages, db.IsDecoy, cancellationToken);
            });

            Console.WriteLine(@"Collected candidate matches: {0}", GetNumberOfMatches(matches));

            progData.StatusInternal = string.Empty;
            progData.Report(100.0);
        }