예제 #1
0
        public double PearsonCorrelation(Peak[] spectrum1, Peak[] spectrum2, FilteredProteinMassBinning comparer)
        {
            var spec1Bar = 0d;
            var spec2Bar = 0d;

            spectrum1 = GuassianFilter(spectrum1, .5);
            spectrum2 = GuassianFilter(spectrum2, .5);
            var vectorLength = comparer.GetBinNumber(10000.0);

            spec1Bar = spectrum1.Sum(x => x.Intensity) / vectorLength;
            spec2Bar = spectrum1.Sum(y => y.Intensity) / vectorLength;

            var intensityVector1 = ConvertToFullIntensityVector(spectrum1, vectorLength, comparer);
            var intensityVector2 = ConvertToFullIntensityVector(spectrum2, vectorLength, comparer);

            var cov = 0.0;
            var s1  = 0.0;
            var s2  = 0.0;

            for (var i = 0; i < vectorLength; i++)
            {
                var d1 = intensityVector1[i] - spec1Bar;
                var d2 = intensityVector2[i] - spec2Bar;
                cov += d1 * d2;
                s1  += d1 * d1;
                s2  += d2 * d2;
            }

            if (s1 <= 0 || s2 <= 0)
            {
                return(0);
            }
            return(cov < 0 ? 0f : cov / Math.Sqrt(s1 * s2));
        }
예제 #2
0
        public void TestProteinMassComparerWithBinning()
        {
            var comparer2 = new FilteredProteinMassBinning(new AminoAcidSet(), 50001);

            for (var i = 9999d; i < 10010; i++)
            {
                Console.WriteLine("{0}, {1}", i, comparer2.GetBinNumber(i));
            }

            //var comparer = new ProteinMassBinning(50, 50001, true);

            /*
             * Console.WriteLine(Constants.GetBinNumHighPrecision(50000));
             * Console.WriteLine(comparer.NumberOfBins);
             * Console.WriteLine(comparer2.NumberOfBins);
             *
             * var rnd = new Random();
             *
             * var mass = 0d;
             * for (var i = 0; i < 450; i ++)
             * {
             *  if (i > 0)
             *  {
             *      var j = rnd.Next(aaSet.Length);
             *      mass += aaSet[j].Mass;
             *  }
             *  if (mass > comparer.MaxMass) break;
             *
             *  var binNum = Constants.GetBinNumHighPrecision(mass);
             *  var binNum1 = comparer.GetBinNumber(mass);
             *  var binNum2 = comparer2.GetBinNumber(mass);
             *
             *  Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", i, mass, binNum, binNum1, binNum2);
             * }*/
        }
예제 #3
0
        public double[] ConvertToFullIntensityVector(Peak[] spectrum, int length, FilteredProteinMassBinning comparer)
        {
            var intensityVector = new double[length];

            Array.Clear(intensityVector, 0, length);
            spectrum = RemovePeaks(10000.0, spectrum);

            for (var i = 0; i < spectrum.Length; i++)
            {
                var binNumber = comparer.GetBinNumber(spectrum[i].Mz);
                if (binNumber >= 0)
                {
                    intensityVector[binNumber - 1] = spectrum[i].Intensity;
                }
            }
            return(intensityVector);
        }
예제 #4
0
        /// <summary>
        /// Precompute edges for the scoring graph.
        /// </summary>
        /// <param name="aminoAcidSet">Amino acid set to build the graph edges from.</param>
        /// <param name="aminoAcidProbabilities">The amino acid probabilities.</param>
        /// <returns>A list of all scoring graph edges.</returns>
        private List <FlipScoringGraphEdge> InitEdges(AminoAcidSet aminoAcidSet, Dictionary <char, double> aminoAcidProbabilities)
        {
            var adjList = new LinkedList <FlipScoringGraphEdge> [this.massBins.NumberOfBins];

            for (var i = 0; i < this.massBins.NumberOfBins; i++)
            {
                adjList[i] = new LinkedList <FlipScoringGraphEdge>();
            }

            var terminalModifications = FilteredProteinMassBinning.GetTerminalModifications(aminoAcidSet);
            var aminoAcidArray        = FilteredProteinMassBinning.GetExtendedAminoAcidArray(aminoAcidSet);

            for (var i = 0; i < this.massBins.NumberOfBins; i++)
            {
                var mi           = this.massBins.GetMass(i);
                var fineNodeMass = mi;

                foreach (var aa in aminoAcidArray)
                {
                    var j = this.massBins.GetBinNumber(fineNodeMass + aa.Mass);
                    if (j < 0 || j >= this.massBins.NumberOfBins)
                    {
                        continue;
                    }
                    var aaWeight = aminoAcidProbabilities.ContainsKey(aa.Residue) ? Math.Log10(aminoAcidProbabilities[aa.Residue]) : 0;
                    adjList[j].AddLast(new FlipScoringGraphEdge(i, j, aaWeight, aa, null));

                    if (i == 0 && !(aa is ModifiedAminoAcid))
                    {
                        foreach (var terminalMod in terminalModifications)
                        {
                            var modifiedAa = new ModifiedAminoAcid(aa, terminalMod);
                            j = this.massBins.GetBinNumber(fineNodeMass + modifiedAa.Mass);
                            if (j < 0 || j >= this.massBins.NumberOfBins)
                            {
                                continue;
                            }
                            adjList[j].AddLast(new FlipScoringGraphEdge(i, j, aaWeight, modifiedAa, null));
                        }
                    }
                }
            }

            return(adjList.SelectMany(edge => edge).ToList());
        }
예제 #5
0
        public ProteinScoringGraphFactory(IMassBinning comparer, AminoAcidSet aminoAcidSet)
        {
            _comparer = comparer;
            _adjList  = new LinkedList <ScoringGraphEdge> [_comparer.NumberOfBins];

            for (var i = 0; i < _comparer.NumberOfBins; i++)
            {
                _adjList[i] = new LinkedList <ScoringGraphEdge>();
            }

            var terminalModifications = FilteredProteinMassBinning.GetTerminalModifications(aminoAcidSet);
            var aminoAcidArray        = FilteredProteinMassBinning.GetExtendedAminoAcidArray(aminoAcidSet);

            for (var i = 0; i < _comparer.NumberOfBins; i++)
            {
                var mi           = _comparer.GetMass(i);
                var fineNodeMass = mi;

                for (var a = 0; a < aminoAcidArray.Length; a++)
                {
                    var aa = aminoAcidArray[a];
                    var j  = _comparer.GetBinNumber(fineNodeMass + aa.Mass);
                    if (j < 0 || j >= _comparer.NumberOfBins)
                    {
                        continue;
                    }
                    _adjList[j].AddLast(new ScoringGraphEdge(i));

                    if (i == 0 && !(aa is ModifiedAminoAcid))
                    {
                        foreach (var terminalMod in terminalModifications)
                        {
                            var modifiedAa = new ModifiedAminoAcid(aa, terminalMod);
                            j = _comparer.GetBinNumber(fineNodeMass + modifiedAa.Mass);
                            if (j < 0 || j >= _comparer.NumberOfBins)
                            {
                                continue;
                            }
                            _adjList[j].AddLast(new ScoringGraphEdge(i));
                        }
                    }
                }
            }
        }
예제 #6
0
        public double RootMeanSquareDeviation(Peak[] spectrum1, Peak[] spectrum2, FilteredProteinMassBinning comparer)
        {
            spectrum1 = GuassianFilter(spectrum1, .5);
            spectrum2 = GuassianFilter(spectrum2, .5);
            var vectorLength     = comparer.GetBinNumber(10000.0);
            var intensityVector1 = ConvertToFullIntensityVector(spectrum1, vectorLength, comparer);
            var intensityVector2 = ConvertToFullIntensityVector(spectrum2, vectorLength, comparer);
            var mean1            = spectrum1.Sum(p => p.Intensity) / spectrum1.Length;
            var mean2            = spectrum1.Sum(p => p.Intensity) / spectrum2.Length;

            var sum = 0d;

            for (int i = 0; i < vectorLength; i++)
            {
                var diff = intensityVector1[i] - intensityVector2[i];
                sum += diff * diff;
            }

            return(Math.Sqrt(sum / vectorLength));
        }
예제 #7
0
        public double DotProduct(Peak[] spectrum1, Peak[] spectrum2, FilteredProteinMassBinning comparer)
        {
            spectrum1 = GuassianFilter(spectrum1, .5);
            spectrum2 = GuassianFilter(spectrum2, .5);
            var vectorLength   = comparer.GetBinNumber(10000.0);
            var featureVector1 = ConvertToFullIntensityVector(spectrum1, vectorLength, comparer);
            var featureVector2 = ConvertToFullIntensityVector(spectrum2, vectorLength, comparer);

            var sum = 0d;

            for (int i = 0; i < vectorLength; i++)
            {
                sum += featureVector1[i] * featureVector2[i];
            }

            var norm1 = featureVector1.Sum(x => x * x);
            var norm2 = featureVector2.Sum(x => x * x);

            return(sum / Math.Sqrt(norm1 * norm2));
        }
예제 #8
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            var fileExt = new string[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();
                var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, parser.NumData, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;
                        Assert.True(ms2Spec != null);
                        var scores = scorer.GetScores(sequence, charge, scan);

                        var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                                                                              isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                        var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                                                                                          comparer);
                        var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                        var gf = new GeneratingFunction(graph);
                        gf.ComputeGeneratingFunction();

                        var specEvalue = gf.GetSpectralEValue(scores.Score);

                        var rowStr    = parser.GetRows()[i];
                        var items     = rowStr.Split('\t').ToArray();
                        var newRowStr = string.Join("\t", items, 0, 15);

                        //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        lock (lines)
                        {
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        }
                        //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                    });

                    foreach (var line in lines)
                    {
                        writer.WriteLine(line);
                    }
                }
                Console.WriteLine("Done");
            }
        }
예제 #9
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            if (pbfFile.DirectoryName == null)
            {
                Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName);
            }

            var fileExt = new[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();

                var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, 30, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));

                        if (!(run.GetSpectrum(scan) is ProductSpectrum ms2Spec))
                        {
                            Console.WriteLine("Could not get the spectrum datafor scan {0}", scan);
                        }
예제 #10
0
        public void TestGetScoreDistribution(int scanNum, string protSequence)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            if (!pbfFile.Exists)
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFile);
            }

            const string modStr = "";

            const int    maxCharge              = 20;
            const int    minCharge              = 1;
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance              = new Tolerance(10);
            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);
            //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass));

            var stopwatch    = Stopwatch.StartNew();
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);

            stopwatch.Stop();
            Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var stopwatch2 = Stopwatch.StartNew();

            var sequence    = Sequence.CreateSequence(protSequence, modStr, aaSet);
            var proteinMass = sequence.Mass + Composition.H2O.Mass;

            Console.WriteLine("Mass = {0}", proteinMass);

            var spectrum   = run.GetSpectrum(scanNum) as ProductSpectrum;
            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge,
                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

            stopwatch.Restart();

            var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer);
            var graph  = graphFactory.CreateScoringGraph(scorer, proteinMass);

            stopwatch.Stop();
            Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            stopwatch.Reset();
            stopwatch.Start();
            var gf = new GeneratingFunction(graph);

            gf.ComputeGeneratingFunction();
            //gf.ComputeGeneratingFunction(graph);
            stopwatch.Stop();
            Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            var scoreDist = gf.GetScoreDistribution();

            Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore);

            Console.WriteLine("{0} : {1}", "score", "specEValue");

            for (var score = 15; score <= gf.MaximumScore; score++)
            {
                var specEvalue = gf.GetSpectralEValue(score);
                Console.WriteLine("{0} : {1}", score, specEvalue);
            }

            stopwatch2.Stop();
            Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", stopwatch2.ElapsedMilliseconds / 1000.0d);
        }
예제 #11
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            if (pbfFile.DirectoryName == null)
            {
                Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName);
            }

            var fileExt = new string[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32((string)s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();

                var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, 30, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;

                        if (ms2Spec == null)
                        {
                            Console.WriteLine("Could not get the spectrum datafor scan {0}", scan);
                        }
                        else
                        {
                            Assert.True(ms2Spec != null);
                            var scores = scorer.GetScores(sequence, charge, scan);

                            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                            var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                                                                                              comparer);
                            var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                            var gf = new GeneratingFunction(graph);
                            gf.ComputeGeneratingFunction();

                            var specEvalue = gf.GetSpectralEValue(scores.Score);

                            var rowStr    = parser.GetRows()[i];
                            var items     = rowStr.Split('\t').ToArray();
                            var newRowStr = string.Join("\t", items, 0, 15);

                            //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                            //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                        }
                    });

                    foreach (var line in (from item in lines where !string.IsNullOrWhiteSpace(item) select item).Take(20))
                    {
                        Console.WriteLine(line);
                    }
                }
                Console.WriteLine("Done");
            }
        }
예제 #12
0
        public void TestGetScoreDistribution()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);
            const string rawFile      = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            const string idFileFolder = @"D:\MassSpecFiles\training\IdScoring\MSPF_trainset";

            const int    scanNum      = 5927;
            const string protSequence = "MNKSELIEKIASGADISKAAAGRALDSFIAAVTEGLKEGDKISLVGFGTFEVRERAERTGRNPQTGEEIKIAAAKIPAFKAGKALKDAVN";

            const string modStr = "";

            var idFile = string.Format(@"{0}\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv", idFileFolder);

            if (!File.Exists(idFile))
            {
                return;
            }
            //Console.WriteLine(dataset);

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }


            const int    maxCharge              = 20;
            const int    minCharge              = 1;
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance              = new Tolerance(10);
            var          run = PbfLcMsRun.GetLcMsRun(rawFile);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);
            //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass));

            var stopwatch    = Stopwatch.StartNew();
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);

            stopwatch.Stop();
            Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var n          = 0;
            var stopwatch2 = Stopwatch.StartNew();

            var sequence    = Sequence.CreateSequence(protSequence, modStr, aaSet);
            var proteinMass = sequence.Mass + Composition.H2O.Mass;

            Console.WriteLine("Mass = {0}", proteinMass);

            var spectrum   = run.GetSpectrum(scanNum) as ProductSpectrum;
            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge,
                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

            stopwatch.Restart();

            var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer);
            var graph  = graphFactory.CreateScoringGraph(scorer, proteinMass);

            stopwatch.Stop();
            Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            stopwatch.Reset();
            stopwatch.Start();
            var gf = new GeneratingFunction(graph);

            gf.ComputeGeneratingFunction();
            //gf.ComputeGeneratingFunction(graph);
            stopwatch.Stop();
            Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            var scoreDist = gf.GetScoreDistribution();

            Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore);

            for (var score = 45; score <= gf.MaximumScore; score++)
            {
                var specEvalue = gf.GetSpectralEValue(score);
                Console.WriteLine("{0} : {1}", score, specEvalue);
            }

            stopwatch2.Stop();
            Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", (stopwatch2.ElapsedMilliseconds) / 1000.0d);
        }