示例#1
0
        public void TestSumMs2Spectra()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var specFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TestYufengData\NewQC_LongSep_29Sep14_141001104925.raw");

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + specFilePath);
            }

            const int minScanNum = 1289;
            //const int maxScanNum = 1389;
            const int minCharge = 6;
            //const int maxCharge = 6;
            const string sequence = "EIRGYRPPEPYKGKGVRYDDEEVRRKEAKKK";
            var          aaSet    = new AminoAcidSet();

            var run = PbfLcMsRun.GetLcMsRun(specFilePath);

            var scorer = new InformedTopDownScorer(run, aaSet, 1, minCharge - 1, new Tolerance(10));

            scorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm,
                             Composition.Parse("C(166) H(270) N(52) O(49) S(0)"), minCharge, minScanNum);
        }
示例#2
0
        public MsAlignRescorer(string specFilePath, string msAlignFilePath, string outputFilePath, Tolerance tolerance, double ms2CorrThreshold = 0.7
                               , int minProductIonCharge = 1, int maxProductIonCharge = 10)
        {
            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);

            _topDownScorer = new InformedTopDownScorer(run, new AminoAcidSet(), minProductIonCharge, maxProductIonCharge, tolerance, ms2CorrThreshold);
            Rescore(msAlignFilePath, outputFilePath);
        }
        public void TestRescoring()
        {
            //const string specFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            //const string sequence = "SGWYELSKSSNDQFKFVLKAGNGEVILTSELYTGKSGAMNGIESVQTNSPIEARYAKEVAKNDKPYFNLKAANHQIIGTSQMYSSTA";
            //const int scanNum = 4084;

            const string sequence = "SKTKHPLPEQWQKNQEAAKATQVAFDLDEKFQYSIRKAALDAGVSPSDQIRTILGLSVSRRPTRPRLTVSLNADDYVQLAEKYDLNADAQLEIKRRVLEDLVRFVAED";
            const int    scanNum  = 5448;
            const int    charge   = 11;

            // Configure amino acid set
            var acetylN      = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM          = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC     = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                acetylN,
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);


            var composition = aaSet.GetComposition(sequence) + Composition.H2O;

            var run            = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0);
            var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10));
            var scores         = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum);

            Console.WriteLine("Total Score = " + scores.Score);
            Console.WriteLine("#Fragments = " + scores.NumMatchedFrags);
        }
示例#4
0
        public void TestRescoring(int scanNum, int charge, string sequence, double expectedScore)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var acetylN      = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM          = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC     = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                acetylN,
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var composition = aaSet.GetComposition(sequence) + Composition.H2O;

            var run            = PbfLcMsRun.GetLcMsRun(pbfFile.FullName, 0, 0);
            var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10));
            var scores         = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum);

            Console.WriteLine("Total Score = " + scores.Score);
            Console.WriteLine("#Fragments = " + scores.NumMatchedFrags);

            Assert.AreEqual(expectedScore, scores.Score, 0.0001);
        }
示例#5
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            var fileExt = new string[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();
                var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, parser.NumData, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;
                        Assert.True(ms2Spec != null);
                        var scores = scorer.GetScores(sequence, charge, scan);

                        var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                                                                              isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                        var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                                                                                          comparer);
                        var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                        var gf = new GeneratingFunction(graph);
                        gf.ComputeGeneratingFunction();

                        var specEvalue = gf.GetSpectralEValue(scores.Score);

                        var rowStr    = parser.GetRows()[i];
                        var items     = rowStr.Split('\t').ToArray();
                        var newRowStr = string.Join("\t", items, 0, 15);

                        //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        lock (lines)
                        {
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        }
                        //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                    });

                    foreach (var line in lines)
                    {
                        writer.WriteLine(line);
                    }
                }
                Console.WriteLine("Done");
            }
        }
示例#6
0
        public void TestPrSm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw";
            //const string annotation =
            //    "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" +
            //    "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" +
            //    "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" +
            //    "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" +
            //    "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" +
            //    "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" +
            //    "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._";
            //var aaSet = new AminoAcidSet();

            //const int charge = 60;
            //const int ms2ScanNum = 46661;

            const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            const int    ms2ScanNum = 19011;
            const int    charge     = 7;
            const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._";

            var acetylN    = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true);
            var modVal     = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0));
            var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList();

            searchMods.Add(acetylN);
            const int numMaxModsPerProtein = 1;
            var       aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein);

            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions());

            var run       = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);
            var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15);

            ms2Scorer.GetScorer(ms2ScanNum);
            var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum);

            Assert.NotNull(scorer, "Scorer is null!");

            for (var i = 0; i < graph.GetNumProteoformCompositions(); i++)
            {
                graph.SetSink(i);
                Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]);
                var score = graph.GetFragmentScore(scorer);
                Console.WriteLine("Fast search score: " + score);
                var composition = graph.GetSinkSequenceCompositionWithH2O();

                var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10));
                var refinedScore   = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum);
                Console.WriteLine("Modifications: {0}", refinedScore.Modifications);
                Console.WriteLine("Composition: {0}", composition);
                Console.WriteLine("RefinedScores: {0}", refinedScore);
            }
        }
示例#7
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            if (pbfFile.DirectoryName == null)
            {
                Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName);
            }

            var fileExt = new[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();

                var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, 30, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));

                        if (!(run.GetSpectrum(scan) is ProductSpectrum ms2Spec))
                        {
                            Console.WriteLine("Could not get the spectrum datafor scan {0}", scan);
                        }
示例#8
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            if (pbfFile.DirectoryName == null)
            {
                Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName);
            }

            var fileExt = new string[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32((string)s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();

                var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, 30, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;

                        if (ms2Spec == null)
                        {
                            Console.WriteLine("Could not get the spectrum datafor scan {0}", scan);
                        }
                        else
                        {
                            Assert.True(ms2Spec != null);
                            var scores = scorer.GetScores(sequence, charge, scan);

                            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                            var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                                                                                              comparer);
                            var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                            var gf = new GeneratingFunction(graph);
                            gf.ComputeGeneratingFunction();

                            var specEvalue = gf.GetSpectralEValue(scores.Score);

                            var rowStr    = parser.GetRows()[i];
                            var items     = rowStr.Split('\t').ToArray();
                            var newRowStr = string.Join("\t", items, 0, 15);

                            //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                            //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                        }
                    });

                    foreach (var line in (from item in lines where !string.IsNullOrWhiteSpace(item) select item).Take(20))
                    {
                        Console.WriteLine(line);
                    }
                }
                Console.WriteLine("Done");
            }
        }
示例#9
0
        private DatabaseSequenceSpectrumMatch[] RunGeneratingFunction(SortedSet <DatabaseSequenceSpectrumMatch>[] sortedMatches, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress)
            {
                Status = "Calculating spectral E-values for matches"
            };

            if (_cachedScoreDistributions == null)
            {
                _cachedScoreDistributions = new LinkedList <Tuple <double, ScoreDistribution> > [_run.MaxLcScan + 1];
                foreach (var scanNum in _ms2ScanNums)
                {
                    _cachedScoreDistributions[scanNum] = new LinkedList <Tuple <double, ScoreDistribution> >();
                }
            }

            var sw = new Stopwatch();

            var topDownScorer = new InformedTopDownScorer(_run, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);

            // Rescore and Estimate #proteins for GF calculation
            var  matches           = new LinkedList <DatabaseSequenceSpectrumMatch> [sortedMatches.Length];
            long estimatedProteins = 0;

            foreach (var scanNum in _ms2ScanNums)
            {
                var prsms = sortedMatches[scanNum];
                if (prsms == null)
                {
                    continue;
                }
                var spec = _run.GetSpectrum(scanNum) as ProductSpectrum;
                if (spec == null)
                {
                    return(null);
                }

                foreach (var match in prsms)
                {
                    var sequence = match.Sequence;
                    var ion      = match.Ion;

                    // Re-scoring
                    var scores = topDownScorer.GetScores(spec, sequence, ion.Composition, ion.Charge, scanNum);
                    if (scores == null)
                    {
                        continue;
                    }

                    match.Score               = scores.Score;
                    match.ModificationText    = scores.Modifications;
                    match.NumMatchedFragments = scores.NumMatchedFrags;
                    if (match.Score > CompositeScorer.ScoreParam.Cutoff)
                    {
                        if (matches[scanNum] == null)
                        {
                            matches[scanNum] = new LinkedList <DatabaseSequenceSpectrumMatch>();
                        }
                        matches[scanNum].AddLast(match);
                    }
                }

                if (matches[scanNum] != null)
                {
                    estimatedProteins += matches[scanNum].Count;
                }
            }

            Console.WriteLine(@"Estimated matched proteins: " + estimatedProteins);

            var numProteins = 0;
            var lastUpdate  = DateTime.MinValue; // Force original update of 0%

            sw.Reset();
            sw.Start();

            var scanNums = _ms2ScanNums.Where(scanNum => matches[scanNum] != null).ToArray();

            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            Parallel.ForEach(scanNums, pfeOptions, scanNum =>
            {
                var currentTask = "?";
                try
                {
                    var scoreDistributions = _cachedScoreDistributions[scanNum];
                    foreach (var match in matches[scanNum])
                    {
                        var currentIteration = "for scan " + scanNum + " and mass " + match.Ion.Composition.Mass;
                        currentTask          = "Calling GetMs2ScoringGraph " + currentIteration;

                        var graph = _ms2ScorerFactory2.GetMs2ScoringGraph(scanNum, match.Ion.Composition.Mass);
                        if (graph == null)
                        {
                            continue;
                        }

                        currentTask = "Calling ComputeGeneratingFunction " + currentIteration;

                        var scoreDist = (from distribution in scoreDistributions
                                         where Math.Abs(distribution.Item1 - match.Ion.Composition.Mass) < PrecursorIonTolerance.GetToleranceAsTh(match.Ion.Composition.Mass)
                                         select distribution.Item2).FirstOrDefault();
                        if (scoreDist == null)
                        {
                            var gf = new GeneratingFunction(graph);
                            gf.ComputeGeneratingFunction();
                            scoreDist = gf.GetScoreDistribution();
                            scoreDistributions.AddLast(new Tuple <double, ScoreDistribution>(match.Ion.Composition.Mass, scoreDist));
                        }

                        currentTask      = "Calling GetSpectralEValue " + currentIteration + " and score " + (int)match.Score;
                        match.SpecEvalue = scoreDist.GetSpectralEValue(match.Score);

                        currentTask = "Reporting progress " + currentIteration;
                        SearchProgressReport(ref numProteins, ref lastUpdate, estimatedProteins, sw, progData);
                    }
                }
                catch (Exception ex)
                {
                    var errMsg = string.Format("Exception while {0}: {1}", currentTask, ex.Message);
                    Console.WriteLine(errMsg);
                    throw new Exception(errMsg, ex);
                }
            });

            var finalMatches = new DatabaseSequenceSpectrumMatch[matches.Length];

            foreach (var scanNum in scanNums)
            {
                finalMatches[scanNum] = matches[scanNum].OrderBy(m => m.SpecEvalue).First();
            }

            progData.StatusInternal = string.Empty;
            progData.Report(100.0);
            return(finalMatches);
        }