public CompositeScorerFactory(
     ILcMsRun run,
     IMassBinning comparer,
     AminoAcidSet aaSet,
     int minProductCharge, int maxProductCharge,
     Tolerance productTolerance,
     int isotopeOffsetTolerance = 2,
     double filteringWindowSize = 1.1)
 {
     _run = run;
     _minProductCharge = minProductCharge;
     _maxProductCharge = maxProductCharge;
     _productTolerance = productTolerance;
     FilteringWindowSize = filteringWindowSize;
     IsotopeOffsetTolerance = isotopeOffsetTolerance;
     _ms2Scorer = new Dictionary<int, IScorer>();
     _comparer = comparer;
     _scoringGraphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
 }
 public CompositeScorerFactory(
     ILcMsRun run,
     IMassBinning comparer,
     AminoAcidSet aaSet,
     int minProductCharge, int maxProductCharge,
     Tolerance productTolerance,
     int isotopeOffsetTolerance = 2,
     double filteringWindowSize = 1.1)
 {
     _run = run;
     _minProductCharge      = minProductCharge;
     _maxProductCharge      = maxProductCharge;
     _productTolerance      = productTolerance;
     FilteringWindowSize    = filteringWindowSize;
     IsotopeOffsetTolerance = isotopeOffsetTolerance;
     _ms2Scorer             = new Dictionary <int, IScorer>();
     _comparer            = comparer;
     _scoringGraphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
 }
Beispiel #3
0
        /// <summary>
        /// Create the correct scoring graph for the given scorer and parent mass.
        /// </summary>
        /// <param name="scorer">The scorer to create the scoring graph for.</param>
        /// <param name="parentMass">The maximum mass of the scoring graph.</param>
        /// <returns>The initialized scoring graph.</returns>
        public IScoringGraph GetScoringGraph(IScorer scorer, double parentMass)
        {
            IScoringGraph scoringGraph;

            if (scorer is CompositeScorerBasedOnDeconvolutedSpectrum)
            {
                var compositeScorer     = (CompositeScorerBasedOnDeconvolutedSpectrum)scorer;
                var scoringGraphFactory = new ProteinScoringGraphFactory(this.massBins, this.aminoAcidSet);
                scoringGraph = scoringGraphFactory.CreateScoringGraph(compositeScorer, parentMass);
            }
            else if (scorer is FlipScorer <DeconvolutedSpectrum> )
            {
                var flipScorer          = (FlipScorer <DeconvolutedSpectrum>)scorer;
                var scoringGraphFactory = new FlipScoringGraphFactory(this.massBins, this.aminoAcidSet, this.aminoAcidProbabilities);
                scoringGraph = scoringGraphFactory.GetScoringGraph(flipScorer, parentMass);
            }
            else
            {
                throw new ArgumentException("No scoring graph exists for that scorer.");
            }

            return(scoringGraph);
        }
Beispiel #4
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            // Configure amino acid set
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var searchModifications = new List<SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            const double filteringWindowSize = 1.1;
            const int isotopeOffsetTolerance = 2;
            var tolerance = new Tolerance(10);
            const int minCharge = 1;
            const int maxCharge = 20;
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            var fileExt = new string[] {"IcTarget", "IcDecoy"};
            foreach (var ext in fileExt)
            {
                var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext);
                var parser = new TsvFileParser(resultFileName);
                var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences = parser.GetData("Sequence").ToArray();
                var modStrs = parser.GetData("Modifications").ToArray();
                var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();
                var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];
                    
                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, parser.NumData, i =>
                    {
                        var scan = scans[i];
                        var charge = charges[i];
                        var protSequence = protSequences[i];
                        var modStr = modStrs[i];
                        var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;
                        Assert.True(ms2Spec != null);
                        var scores = scorer.GetScores(sequence, charge, scan);

                        var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                            isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                        var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                            comparer);
                        var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                        var gf = new GeneratingFunction(graph);
                        gf.ComputeGeneratingFunction();

                        var specEvalue = gf.GetSpectralEValue(scores.Score);

                        var rowStr = parser.GetRows()[i];
                        var items = rowStr.Split('\t').ToArray();
                        var newRowStr = string.Join("\t", items, 0, 15);

                        //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        lock (lines)
                        {
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);    
                        }
                        //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                    });

                    foreach (var line in lines) writer.WriteLine(line);
                }
                Console.WriteLine("Done");
            }
        }
        public void TestGetScoreDistribution()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);
            const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            const string idFileFolder = @"D:\MassSpecFiles\training\IdScoring\MSPF_trainset";

            const int scanNum = 5927;
            const string protSequence = "MNKSELIEKIASGADISKAAAGRALDSFIAAVTEGLKEGDKISLVGFGTFEVRERAERTGRNPQTGEEIKIAAAKIPAFKAGKALKDAVN";
            
            const string modStr = "";

            var idFile = string.Format(@"{0}\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv", idFileFolder);
            if (!File.Exists(idFile)) return;
            //Console.WriteLine(dataset);

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }


            const int maxCharge = 20;
            const int minCharge = 1;
            const double filteringWindowSize = 1.1;
            const int isotopeOffsetTolerance = 2;
            var tolerance = new Tolerance(10);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);

            // Configure amino acid set
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var searchModifications = new List<SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);
            //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass));

            var stopwatch = Stopwatch.StartNew();
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            stopwatch.Stop();
            Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var n = 0;
            var stopwatch2 = Stopwatch.StartNew();

            var sequence = Sequence.CreateSequence(protSequence, modStr, aaSet);
            var proteinMass = sequence.Mass + Composition.H2O.Mass;

                Console.WriteLine("Mass = {0}", proteinMass);

                var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum;
                var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge,
                    isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                stopwatch.Restart();

                var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer);
                var graph = graphFactory.CreateScoringGraph(scorer, proteinMass);
                stopwatch.Stop();
                Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
                
                stopwatch.Reset();
                stopwatch.Start();
                var gf = new GeneratingFunction(graph);
                gf.ComputeGeneratingFunction();
                //gf.ComputeGeneratingFunction(graph);
                stopwatch.Stop();
                Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
                var scoreDist = gf.GetScoreDistribution();

                Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore);
                
                for (var score = 45; score <= gf.MaximumScore; score++)
                {
                    var specEvalue = gf.GetSpectralEValue(score);
                    Console.WriteLine("{0} : {1}", score, specEvalue);
                }
               
            stopwatch2.Stop();
            Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", (stopwatch2.ElapsedMilliseconds) / 1000.0d);
        }