Exemple #1
0
        public static Spectrum GetDeconvolutedSpectrum(Spectrum spec, int minCharge, int maxCharge, Tolerance tolerance, double corrThreshold,
                                                       int isotopeOffsetTolerance, double filteringWindowSize = 1.1)
        {
            var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, corrThreshold);
            var peakList          = new List <Peak>();
            var binHash           = new HashSet <int>();

            foreach (var deconvolutedPeak in deconvolutedPeaks)
            {
                var mass   = deconvolutedPeak.Mass;
                var binNum = GetBinNumber(mass);
                if (!binHash.Add(binNum))
                {
                    continue;
                }
                peakList.Add(new Peak(mass, deconvolutedPeak.Intensity));
            }

            var productSpec = spec as ProductSpectrum;

            if (productSpec != null)
            {
                return(new ProductSpectrum(peakList, spec.ScanNum)
                {
                    MsLevel = spec.MsLevel,
                    ActivationMethod = productSpec.ActivationMethod,
                    IsolationWindow = productSpec.IsolationWindow
                });
            }

            return(new Spectrum(peakList, spec.ScanNum));
        }
        private DeconvolutedSpectrum GetDeconvolutedSpectrum(int scan, PbfLcMsRun pbfLcMsRun)
        {
            var spectrum = pbfLcMsRun.GetSpectrum(scan) as ProductSpectrum;

            if (spectrum == null)
            {
                return(null);
            }

            return(Deconvoluter.GetCombinedDeconvolutedSpectrum(spectrum, 1, 20, 2, new Tolerance(10, ToleranceUnit.Ppm), 0.7));
        }
        public IScorer GetScorer(int scanNum)
        {
            var spec = _run.GetSpectrum(scanNum) as ProductSpectrum;

            if (spec == null)
            {
                return(null);
            }
            var deconvolutedSpec = Deconvoluter.GetDeconvolutedSpectrum(spec, _minProductCharge, _maxProductCharge, IsotopeOffsetTolerance, FilteringWindowSize, _productTolerance);

            return(deconvolutedSpec != null ? new CompositeScorerBasedOnDeconvolutedSpectrum(deconvolutedSpec, spec, _productTolerance, _comparer) : null);
        }
Exemple #4
0
 public IScorer GetScorer(int scanNum)
 {
     try
     {
         if (!(_run.GetSpectrum(scanNum) is ProductSpectrum spec))
         {
             return(null);
         }
         var deconvolutedSpec = Deconvoluter.GetDeconvolutedSpectrum(spec, _minProductCharge, _maxProductCharge, IsotopeOffsetTolerance, FilteringWindowSize, _productTolerance);
         return(deconvolutedSpec != null ? new CompositeScorerBasedOnDeconvolutedSpectrum(deconvolutedSpec, spec, _productTolerance, _comparer) : null);
     }
     catch (Exception ex)
     {
         throw new Exception(string.Format("Error getting the scorer for scan {0} in GetScorer: {1}", scanNum, ex.Message), ex);
     }
 }
Exemple #5
0
        public SequenceTagFinder(ProductSpectrum spec, Tolerance tolerance, int minTagLength = 5, int maxTagLength = 8, AminoAcid[] aminoAcidsArray = null)
            : base(maxTagLength)
        {
            var baseIonTypes   = spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD;
            var ionTypeFactory = new IonTypeFactory(baseIonTypes, new List <NeutralLoss> {
                NeutralLoss.NoLoss
            }, MaxCharge);

            // ReSharper disable once UnusedVariable
            // This call is used to validate the ion types returned by ionTypeFactory
            var _ionTypes = ionTypeFactory.GetAllKnownIonTypes().ToArray();

            _aminoAcidsArray = aminoAcidsArray ?? AminoAcid.StandardAminoAcidArr;
            _tolerance       = tolerance;

            if (_aminoAcidsArray.Length - 1 > Byte.MaxValue)
            {
                throw new Exception("Too many amino acid types");
            }

            _maxAminoAcidMass = 0d;
            _minAminoAcidMass = 10E4;
            foreach (var aa in _aminoAcidsArray)
            {
                if (aa.Composition.Mass > _maxAminoAcidMass)
                {
                    _maxAminoAcidMass = aa.Composition.Mass;
                }

                if (aa.Composition.Mass < _minAminoAcidMass)
                {
                    _minAminoAcidMass = aa.Composition.Mass;
                }
            }
            _minTagLength = minTagLength;

            _spectrum          = spec;
            _deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(_spectrum.ScanNum, _spectrum.Peaks, MinCharge, MaxCharge, IsotopeOffsetTolerance, 1.1, _tolerance, 0.7);

            SetNodeCount(_deconvolutedPeaks.Count);
            CollectSequenceTagGraphEdges();

            _seqTagSet                = new HashSet <SequenceTag>();
            NumberOfProcessedPaths    = 0;
            MaxNumberOfProcessedPaths = 1024;
        }
Exemple #6
0
        public void CountScansTest()
        {
            var file         = @"C:\Users\wilk011\Documents\DataFiles\MSPF\Ecoli_Ribosome\Ecoli_intact_UVPD-3pulse0p5mJ_05-20-2017.pbf";
            var pbfLcmsRun   = PbfLcMsRun.GetLcMsRun(file);
            var deconvoluter = new Deconvoluter(1, 20, 2, 0.1, new Tolerance(10, ToleranceUnit.Ppm));
            var lcmsRunDecon = new LcmsRunDeconvoluter(pbfLcmsRun, deconvoluter, 2, 6);
            var dlcms        = new DPbfLcMsRun(file, lcmsRunDecon, keepDataReaderOpen: true);
            int count        = 0;
            var scans        = pbfLcmsRun.GetScanNumbers(2);

            foreach (var scan in scans)
            {
                var spectrum = pbfLcmsRun.GetSpectrum(scan) as ProductSpectrum;
                if (spectrum.Peaks.Length < 50 || spectrum.IsolationWindow.Charge == null)
                {
                    continue;
                }

                count++;
            }
            Console.WriteLine(count);
        }
Exemple #7
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            var fileExt = new string[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();
                var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, parser.NumData, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;
                        Assert.True(ms2Spec != null);
                        var scores = scorer.GetScores(sequence, charge, scan);

                        var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                                                                              isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                        var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                                                                                          comparer);
                        var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                        var gf = new GeneratingFunction(graph);
                        gf.ComputeGeneratingFunction();

                        var specEvalue = gf.GetSpectralEValue(scores.Score);

                        var rowStr    = parser.GetRows()[i];
                        var items     = rowStr.Split('\t').ToArray();
                        var newRowStr = string.Join("\t", items, 0, 15);

                        //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        lock (lines)
                        {
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        }
                        //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                    });

                    foreach (var line in lines)
                    {
                        writer.WriteLine(line);
                    }
                }
                Console.WriteLine("Done");
            }
        }
Exemple #8
0
        /// <summary>
        /// Get correctly filtered and/or de-convoluted spectrum
        /// </summary>
        /// <returns>Filtered and/or de-convoluted spectrum</returns>
        private Spectrum GetSpectrum()
        {
            // Filtered/Deconvoluted Spectrum?
            var currentSpectrum = this.Spectrum;
            var tolerance       = (currentSpectrum is ProductSpectrum)
                                ? IcParameters.Instance.ProductIonTolerancePpm
                                : IcParameters.Instance.PrecursorTolerancePpm;

            if (this.ShowFilteredSpectrum && this.ShowDeconvolutedSpectrum)
            {
                if (this.filteredDeconvolutedSpectrum == null)
                {
                    this.filteredDeconvolutedSpectrum = new Spectrum(currentSpectrum.Peaks, currentSpectrum.ScanNum);
                    this.filteredDeconvolutedSpectrum.FilterNosieByIntensityHistogram();
                    this.deconvolutedSpectrum = Deconvoluter.GetCombinedDeconvolutedSpectrum(
                        currentSpectrum,
                        Constants.MinCharge,
                        Constants.MaxCharge,
                        Constants.IsotopeOffsetTolerance,
                        tolerance,
                        IcParameters.Instance.IonCorrelationThreshold);
                    //this.deconvolutedSpectrum = ProductScorerBasedOnDeconvolutedSpectra.GetDeconvolutedSpectrum(
                    //    currentSpectrum,
                    //    Constants.MinCharge,
                    //    Constants.MaxCharge,
                    //    tolerance,
                    //    IcParameters.Instance.IonCorrelationThreshold,
                    //    Constants.IsotopeOffsetTolerance);
                }

                currentSpectrum = this.filteredDeconvolutedSpectrum;
            }
            else if (this.ShowFilteredSpectrum)
            {
                if (this.filteredSpectrum == null)
                {
                    this.filteredSpectrum = new Spectrum(currentSpectrum.Peaks, currentSpectrum.ScanNum);
                    this.filteredSpectrum.FilterNosieByIntensityHistogram();
                }

                currentSpectrum = this.filteredSpectrum;
            }
            else if (this.ShowDeconvolutedSpectrum)
            {
                if (this.deconvolutedSpectrum == null)
                {
                    this.deconvolutedSpectrum = Deconvoluter.GetCombinedDeconvolutedSpectrum(
                        currentSpectrum,
                        Constants.MinCharge,
                        Constants.MaxCharge,
                        Constants.IsotopeOffsetTolerance,
                        tolerance,
                        IcParameters.Instance.IonCorrelationThreshold);
                    //this.deconvolutedSpectrum = ProductScorerBasedOnDeconvolutedSpectra.GetDeconvolutedSpectrum(
                    //    currentSpectrum,
                    //    Constants.MinCharge,
                    //    Constants.MaxCharge,
                    //    tolerance,
                    //    IcParameters.Instance.IonCorrelationThreshold,
                    //    Constants.IsotopeOffsetTolerance);
                }

                currentSpectrum = this.deconvolutedSpectrum;
            }

            if (this.ShowOnlyTop20Peaks)
            {
                var top20Peaks = currentSpectrum.Peaks.OrderByDescending(p => p.Intensity).Take(20).OrderBy(p => p.Mz).ToList();
                currentSpectrum = new Spectrum(top20Peaks, currentSpectrum.ScanNum);
            }

            return(currentSpectrum);
        }
Exemple #9
0
        }                                                         // true: target and decoy, false: target only, null: decoy only

        public void QuickId()
        {
            const string rawFilePath   = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            const string modFilePath   = @"H:\Research\QCShew_TopDown\Production\Mods.txt";
            const int    numBits       = 29; // max error: 4ppm
            const int    minCharge     = 1;
            const int    maxCharge     = 20;
            var          tolerance     = new Tolerance(10);
            const double corrThreshold = 0.7;

            var          comparer        = new MzComparerWithBinning(numBits);
            const double minFragmentMass = 200.0;
            const double maxFragmentMass = 50000.0;
            var          minFragMassBin  = comparer.GetBinNumber(minFragmentMass);
            var          maxFragMassBin  = comparer.GetBinNumber(maxFragmentMass);

            var aminoAcidSet = new AminoAcidSet(modFilePath);

            var run           = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var ms2ScanNumArr = run.GetScanNumbers(2).ToArray();

            var sw = new Stopwatch();

            sw.Start();
            Console.Write("Building Spectrum Arrays...");
            var massVectors = new BitArray[maxFragMassBin - minFragMassBin + 1];

            for (var i = minFragMassBin; i <= maxFragMassBin; i++)
            {
                massVectors[i - minFragMassBin] = new BitArray(run.MaxLcScan + 1);
            }

            foreach (var ms2ScanNum in ms2ScanNumArr)
            {
                var productSpec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                if (productSpec == null)
                {
                    continue;
                }

                var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(productSpec.Peaks, minCharge, maxCharge, 2, 1.1, tolerance, corrThreshold);

                if (deconvolutedPeaks == null)
                {
                    continue;
                }

                foreach (var p in deconvolutedPeaks)
                {
                    var mass      = p.Mass;
                    var deltaMass = tolerance.GetToleranceAsDa(mass, 1);
                    var minMass   = mass - deltaMass;
                    var maxMass   = mass + deltaMass;

                    var minBinNum = comparer.GetBinNumber(minMass);
                    var maxBinNum = comparer.GetBinNumber(maxMass);
                    for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
                    {
                        if (binNum >= minFragMassBin && binNum <= maxFragMassBin)
                        {
                            massVectors[binNum - minFragMassBin][ms2ScanNum] = true;
                        }
                    }
                }
            }
            sw.Stop();
            Console.WriteLine(@"{0:f1} sec.", sw.Elapsed.TotalSeconds);

            sw.Reset();
            sw.Start();
            var fastaDb = new FastaDatabase(fastaFilePath);

            fastaDb.Read();
            var indexedDb   = new IndexedDatabase(fastaDb);
            var numProteins = 0;
            var intactProteinAnnotationAndOffsets =
                indexedDb.IntactSequenceAnnotationsAndOffsets(0, int.MaxValue);

            var bestProtein = new string[run.MaxLcScan + 1];
            var bestScore   = new int[run.MaxLcScan + 1];

            foreach (var annotationAndOffset in intactProteinAnnotationAndOffsets)
            {
                if (++numProteins % 10 == 0)
                {
                    Console.WriteLine(@"Processing, {0} proteins done, {1:f1} sec elapsed",
                                      numProteins,
                                      sw.Elapsed.TotalSeconds);
                }
                var annotation = annotationAndOffset.Annotation;
                var offset     = annotationAndOffset.Offset;

                var protSequence = annotation.Substring(2, annotation.Length - 4);

                // suffix
                var seqGraph = SequenceGraph.CreateGraph(aminoAcidSet, AminoAcid.ProteinNTerm, protSequence,
                                                         AminoAcid.ProteinCTerm);
                if (seqGraph == null)
                {
                    continue;
                }

                for (var numNTermCleavage = 0; numNTermCleavage <= 1; numNTermCleavage++)
                {
                    if (numNTermCleavage > 0)
                    {
                        seqGraph.CleaveNTerm();
                    }
                    var allCompositions = seqGraph.GetAllFragmentNodeCompositions();

                    var scoreArr = new int[run.MaxLcScan + 1];
                    foreach (var fragComp in allCompositions)
                    {
                        var suffixMass = fragComp.Mass + BaseIonType.Y.OffsetComposition.Mass;
                        var binNum     = comparer.GetBinNumber(suffixMass);
                        if (binNum < minFragMassBin || binNum > maxFragMassBin)
                        {
                            continue;
                        }

                        var vector = massVectors[binNum - minFragMassBin];
                        foreach (var ms2ScanNum in ms2ScanNumArr)
                        {
                            if (vector[ms2ScanNum])
                            {
                                ++scoreArr[ms2ScanNum];
                            }
                        }
                    }
                    foreach (var ms2ScanNum in ms2ScanNumArr)
                    {
                        if (scoreArr[ms2ScanNum] > bestScore[ms2ScanNum])
                        {
                            bestScore[ms2ScanNum] = scoreArr[ms2ScanNum];
                            var proteinName = fastaDb.GetProteinName(offset);
                            bestProtein[ms2ScanNum] = proteinName + (numNTermCleavage == 1 ? "'" : "");
                        }
                    }
                }
                // prefix
            }

            Console.WriteLine("ScanNum\tBestProtein\tScore");
            foreach (var ms2ScanNum in ms2ScanNumArr)
            {
                Console.WriteLine("{0}\t{1}\t{2}", ms2ScanNum, bestScore[ms2ScanNum], bestProtein[ms2ScanNum] ?? "");
            }
        }
Exemple #10
0
        public void TestGetScoreDistribution(int scanNum, string protSequence)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            if (!pbfFile.Exists)
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFile);
            }

            const string modStr = "";

            const int    maxCharge              = 20;
            const int    minCharge              = 1;
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance              = new Tolerance(10);
            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);
            //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass));

            var stopwatch    = Stopwatch.StartNew();
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);

            stopwatch.Stop();
            Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var stopwatch2 = Stopwatch.StartNew();

            var sequence    = Sequence.CreateSequence(protSequence, modStr, aaSet);
            var proteinMass = sequence.Mass + Composition.H2O.Mass;

            Console.WriteLine("Mass = {0}", proteinMass);

            var spectrum   = run.GetSpectrum(scanNum) as ProductSpectrum;
            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge,
                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

            stopwatch.Restart();

            var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer);
            var graph  = graphFactory.CreateScoringGraph(scorer, proteinMass);

            stopwatch.Stop();
            Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            stopwatch.Reset();
            stopwatch.Start();
            var gf = new GeneratingFunction(graph);

            gf.ComputeGeneratingFunction();
            //gf.ComputeGeneratingFunction(graph);
            stopwatch.Stop();
            Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            var scoreDist = gf.GetScoreDistribution();

            Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore);

            Console.WriteLine("{0} : {1}", "score", "specEValue");

            for (var score = 15; score <= gf.MaximumScore; score++)
            {
                var specEvalue = gf.GetSpectralEValue(score);
                Console.WriteLine("{0} : {1}", score, specEvalue);
            }

            stopwatch2.Stop();
            Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", stopwatch2.ElapsedMilliseconds / 1000.0d);
        }
Exemple #11
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            if (pbfFile.DirectoryName == null)
            {
                Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName);
            }

            var fileExt = new string[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32((string)s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();

                var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, 30, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;

                        if (ms2Spec == null)
                        {
                            Console.WriteLine("Could not get the spectrum datafor scan {0}", scan);
                        }
                        else
                        {
                            Assert.True(ms2Spec != null);
                            var scores = scorer.GetScores(sequence, charge, scan);

                            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                            var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                                                                                              comparer);
                            var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                            var gf = new GeneratingFunction(graph);
                            gf.ComputeGeneratingFunction();

                            var specEvalue = gf.GetSpectralEValue(scores.Score);

                            var rowStr    = parser.GetRows()[i];
                            var items     = rowStr.Split('\t').ToArray();
                            var newRowStr = string.Join("\t", items, 0, 15);

                            //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                            //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                        }
                    });

                    foreach (var line in (from item in lines where !string.IsNullOrWhiteSpace(item) select item).Take(20))
                    {
                        Console.WriteLine(line);
                    }
                }
                Console.WriteLine("Done");
            }
        }
Exemple #12
0
        public void TestGetScoreDistribution()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);
            const string rawFile      = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            const string idFileFolder = @"D:\MassSpecFiles\training\IdScoring\MSPF_trainset";

            const int    scanNum      = 5927;
            const string protSequence = "MNKSELIEKIASGADISKAAAGRALDSFIAAVTEGLKEGDKISLVGFGTFEVRERAERTGRNPQTGEEIKIAAAKIPAFKAGKALKDAVN";

            const string modStr = "";

            var idFile = string.Format(@"{0}\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv", idFileFolder);

            if (!File.Exists(idFile))
            {
                return;
            }
            //Console.WriteLine(dataset);

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }


            const int    maxCharge              = 20;
            const int    minCharge              = 1;
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance              = new Tolerance(10);
            var          run = PbfLcMsRun.GetLcMsRun(rawFile);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);
            //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass));

            var stopwatch    = Stopwatch.StartNew();
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);

            stopwatch.Stop();
            Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var n          = 0;
            var stopwatch2 = Stopwatch.StartNew();

            var sequence    = Sequence.CreateSequence(protSequence, modStr, aaSet);
            var proteinMass = sequence.Mass + Composition.H2O.Mass;

            Console.WriteLine("Mass = {0}", proteinMass);

            var spectrum   = run.GetSpectrum(scanNum) as ProductSpectrum;
            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge,
                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

            stopwatch.Restart();

            var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer);
            var graph  = graphFactory.CreateScoringGraph(scorer, proteinMass);

            stopwatch.Stop();
            Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            stopwatch.Reset();
            stopwatch.Start();
            var gf = new GeneratingFunction(graph);

            gf.ComputeGeneratingFunction();
            //gf.ComputeGeneratingFunction(graph);
            stopwatch.Stop();
            Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            var scoreDist = gf.GetScoreDistribution();

            Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore);

            for (var score = 45; score <= gf.MaximumScore; score++)
            {
                var specEvalue = gf.GetSpectralEValue(score);
                Console.WriteLine("{0} : {1}", score, specEvalue);
            }

            stopwatch2.Stop();
            Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", (stopwatch2.ElapsedMilliseconds) / 1000.0d);
        }