コード例 #1
0
        public void TestDeconvolution()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            if (!File.Exists(TestRawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath);
            }

            const int minScanNum = 46454;   // 635.43
            const int maxScanNum = 46661;   // 638.90
            const int MAX_POINTS = 50;

            var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath) as PbfLcMsRun;

            if (run == null)
            {
                return;
            }
            var summedSpec = run.GetSummedMs1Spectrum(minScanNum, maxScanNum);

            summedSpec.FilterNoise(50.0);
            // summedSpec.Display(MAX_POINTS);

            var deconvoluted = ProductScorerBasedOnDeconvolutedSpectra.GetDeconvolutedSpectrum(summedSpec, 2, 45, new Tolerance(10), 0.9, 2);

            deconvoluted.Display(MAX_POINTS);
        }
コード例 #2
0
        public void TestMs2Caching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            //const int minPrecursorIonCharge = 3; // 3
            //const int maxPrecursorIonCharge = 30;// 67
            //const int minProductIonCharge = 1;
            //const int maxProductIonCharge = 10;

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            var runCache = new ProductScorerBasedOnDeconvolutedSpectra(run);

            runCache.DeconvoluteAllProductSpectra();
            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
コード例 #3
0
 public FeatureBasedTagSearchEngine(
     LcMsRun run,
     Ms1FtParser featureParser,
     ProductScorerBasedOnDeconvolutedSpectra ms2Scorer,
     SequenceTagParser tagParser,
     FastaDatabase fastaDb,
     Tolerance tolerance,
     AminoAcidSet aaSet,
     double maxSequenceMass  = 50000.0,
     int minProductIonCharge = 1,
     int maxProductIonCharge = 20)
 {
     _run                 = run;
     _ms2Scorer           = ms2Scorer;
     _featureParser       = featureParser;
     _ms1FtFilter         = new Ms1FtFilter(run, tolerance, featureParser.Ms1FtFileName);
     _tagParser           = tagParser;
     _fastaDb             = fastaDb;
     _searchableDb        = new SearchableDatabase(fastaDb);
     _tolerance           = tolerance;
     _aaSet               = aaSet;
     _maxSequenceMass     = maxSequenceMass;
     _minProductIonCharge = minProductIonCharge;
     _maxProductIonCharge = maxProductIonCharge;
 }
コード例 #4
0
        public void TestPsm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            const char   pre        = 'R';
            const string sequence   = "LENWPPASLADDL";
            const char   post       = 'A';
            const string annotation = "R.LENWPPASLADDL._";
            const int    charge     = 2;
            const int    ms2ScanNum = 25534;

            var aaSet = new AminoAcidSet();

            var run       = InMemoryLcMsRun.GetLcMsRun(specFilePath, 0, 0);
            var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 2, 10, 0, 1.1);

            ms2Scorer.DeconvoluteAllProductSpectra();
            var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum);

            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            graph.SetSink(0);
            var score = graph.GetFragmentScore(scorer);

            Console.WriteLine("Fast search score: " + score);
            var composition = graph.GetSinkSequenceCompositionWithH2O();

            var informedScorer = new InformedBottomUpScorer(run, aaSet, 1, 15, new Tolerance(10));
            var refinedScore   = informedScorer.GetScores(pre, sequence, post, composition, charge, ms2ScanNum);

            Console.WriteLine("RefinedScores: {0}", refinedScore);
        }
コード例 #5
0
ファイル: TestIcTopDown.cs プロジェクト: javamng/GitHUB
        public void TestPrSm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw";
            //const string annotation =
            //    "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" +
            //    "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" +
            //    "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" +
            //    "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" +
            //    "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" +
            //    "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" +
            //    "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._";
            //var aaSet = new AminoAcidSet();

            //const int charge = 60;
            //const int ms2ScanNum = 46661;

            const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            const int    ms2ScanNum = 19011;
            const int    charge     = 7;
            const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._";

            var acetylN    = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true);
            var modVal     = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0));
            var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList();

            searchMods.Add(acetylN);
            const int numMaxModsPerProtein = 1;
            var       aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein);

            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions());

            var run       = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);
            var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15);

            ms2Scorer.GetScorer(ms2ScanNum);
            var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum);

            Assert.NotNull(scorer, "Scorer is null!");

            for (var i = 0; i < graph.GetNumProteoformCompositions(); i++)
            {
                graph.SetSink(i);
                Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]);
                var score = graph.GetFragmentScore(scorer);
                Console.WriteLine("Fast search score: " + score);
                var composition = graph.GetSinkSequenceCompositionWithH2O();

                var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10));
                var refinedScore   = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum);
                Console.WriteLine("Modifications: {0}", refinedScore.Modifications);
                Console.WriteLine("Composition: {0}", composition);
                Console.WriteLine("RefinedScores: {0}", refinedScore);
            }
        }
コード例 #6
0
        public void FilteringEfficiency()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm       = 10;
            var       tolerance          = new Tolerance(tolerancePpm);

            sw.Reset();
            sw.Start();
            //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm);
            //
            //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20);
            //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run,
            //    minPrecursorCharge, maxPrecursorCharge,
            //    0, 0,
            //    600.0, 1800.0, new Tolerance(tolerancePpm), null);
            //ms1BasedFilter.CachePrecursorMatchesBinCentric();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40);

            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;

            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var          minBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var          maxBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var          numComparisons = 0L;

            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader    = new TsvFileParser(resultFilePath);
            var compositions = tsvReader.GetData("Composition");
            var scanNums     = tsvReader.GetData("ScanNum");
            var charges      = tsvReader.GetData("Charge");
            var scores       = tsvReader.GetData("Score");
            var qvalues      = tsvReader.GetData("QValue");
            var sequences    = tsvReader.GetData("Sequence");

            var sequenceCount = new Dictionary <string, int>();

            for (var i = 0; i < compositions.Count; i++)
            {
                if (qvalues != null)
                {
                    var qValue = Convert.ToDouble(qvalues[i]);
                    if (qValue > 0.01)
                    {
                        continue;
                    }
                }
                else
                {
                    var score = Convert.ToDouble(scores[i]);
                    if (score < 13)
                    {
                        continue;
                    }
                }
                var scanNum      = Convert.ToInt32(scanNums[i]);
                var charge       = Convert.ToInt32(charges[i]);
                var composition  = Composition.Parse(compositions[i]);
                var precursorIon = new Ion(composition, charge);
                var isValid      = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid)
                {
                    continue;
                }

                var sequence = sequences[i];
                if (sequenceCount.TryGetValue(sequence, out var count))
                {
                    sequenceCount[sequence] = count + 1;
                }
                else
                {
                    sequenceCount[sequence] = 1;
                }
            }
            //var sequences = tsvReader.GetData("Annotation");

            var seqSet             = new HashSet <string>();
            var allSeqSet          = new HashSet <string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs         = 0;

            for (var i = 0; i < compositions.Count; i++)
            {
                if (qvalues != null)
                {
                    var qValue = Convert.ToDouble(qvalues[i]);
                    if (qValue > 0.01)
                    {
                        continue;
                    }
                }
                else
                {
                    var score = Convert.ToDouble(scores[i]);
                    if (score < 13)
                    {
                        continue;
                    }
                }
                var scanNum      = Convert.ToInt32(scanNums[i]);
                var charge       = Convert.ToInt32(charges[i]);
                var composition  = Composition.Parse(compositions[i]);
                var precursorIon = new Ion(composition, charge);
                var isValid      = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid)
                {
                    continue;
                }

                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec    = run.GetSpectrum(precursorScanNum);
                var corr1            = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec    = run.GetSpectrum(nextScanNum);
                var corr2       = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance);
                ////xic.Display();
                //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false);
                //var apexSpec = run.GetSpectrum(apexScanNum);
                //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
コード例 #7
0
        public void FilteringEfficiencyQcShew()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm       = 10;
            var       tolerance          = new Tolerance(tolerancePpm);

            sw.Reset();
            sw.Start();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40);

            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;

            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var          minBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var          maxBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var          numComparisons = 0L;

            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            //const string prot =
            //    "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA";
            //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass;
            //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass)));

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var scanNums  = tsvReader.GetData("Scan(s)");
            var charges   = tsvReader.GetData("Charge");
            var scores    = tsvReader.GetData("E-value");
            var sequences = tsvReader.GetData("Peptide");

            //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv";
            //var tsvReader = new TsvFileParser(resultFilePath);
            //var scanNums = tsvReader.GetData("ScanNum");
            //var charges = tsvReader.GetData("Charge");
            //var scores = tsvReader.GetData("Score");
            //var sequences = tsvReader.GetData("Sequence");

            var aaSet = new AminoAcidSet();

            var seqSet             = new HashSet <string>();
            var allSeqSet          = new HashSet <string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs         = 0;

            for (var i = 0; i < scores.Count; i++)
            {
                var score = Convert.ToDouble(scores[i]);
                if (score > 1E-4)
                {
                    continue;
                }
                //if (score < 10) continue;

                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge  = Convert.ToInt32(charges[i]);

                var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                if (sequence == null || sequence.Contains("("))
                {
                    continue;
                }
                //var sequence = sequences[i];
                var composition = aaSet.GetComposition(sequence) + Composition.H2O;

                var precursorIon = new Ion(composition, charge);
                var isValid      = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid)
                {
                    continue;
                }
                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec    = run.GetSpectrum(precursorScanNum);
                var corr1            = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec    = run.GetSpectrum(nextScanNum);
                var corr2       = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
コード例 #8
0
        public void Test43KProtein()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Configure amino acid set
            var acetylN              = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM                  = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC             = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC         = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            var dethiomethylM        = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false);
            var deamidatedN          = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false);
            var deamidatedQ          = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false);
            var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C',
                                                              SequenceLocation.ProteinNTerm, false);
            var phosphoS         = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false);
            var phosphoT         = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false);
            var phosphoY         = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false);
            var nitrosylC        = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false);
            var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                dethiomethylM,
                acetylN,
                //phosphoS,
                //phosphoT,
                //phosphoY,
                deamidatedN,
//                deamidatedQ,
                glutathioneC,
                pyroCarbamidomethylC,
                nitrosylC,
                nethylmaleimideC
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

//            var aaSet = new AminoAcidSet();

            if (!File.Exists(TestRawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath);
            }

            var          run          = PbfLcMsRun.GetLcMsRun(TestRawFilePath);
            const string protSequence =
                "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR";
            const string annotation = "_." + protSequence + "._";
            var          seqGraph   = SequenceGraph.CreateGraph(aaSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;
            }

            var ms1Filter        = new SimpleMs1Filter();
            var ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(run);

            foreach (var ms2ScanNum in Ms2ScanNums)
            {
                ms2ScorerFactory.GetScorer(ms2ScanNum);
            }

            for (var numNTermCleavages = 0; numNTermCleavages <= 0; numNTermCleavages++)
            {
                if (numNTermCleavages > 0)
                {
                    seqGraph.CleaveNTerm();
                }
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs       = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass           = protCompositionWithH2O.Mass;
                    var modCombinations        = modCombs[modIndex];

                    foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass))
                    {
                        var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                        if (spec == null)
                        {
                            continue;
                        }
                        var charge =
                            (int)
                            Math.Round(sequenceMass /
                                       (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton));
                        var scorer = ms2ScorerFactory.GetMs2Scorer(ms2ScanNum);
                        var score  = seqGraph.GetFragmentScore(scorer);
                        if (score <= 3)
                        {
                            continue;
                        }

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence     = protSequence.Substring(numNTermCleavages);
                        var pre          = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1];
                        var post         = annotation[annotation.Length - 1];

                        Console.WriteLine("{0}.{1}.{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", pre, sequence, post, ms2ScanNum, modCombinations,
                                          precursorIon.GetMostAbundantIsotopeMz(), precursorIon.Charge, precursorIon.Composition.Mass, score);
                    }
                }
            }
        }
コード例 #9
0
        public bool RunSearch(double corrThreshold)
        {
            var sw = new Stopwatch();

            ErrorMessage = string.Empty;

            Console.Write(@"Reading raw file...");
            sw.Start();
            _run            = InMemoryLcMsRun.GetLcMsRun(SpecFilePath, 1.4826, 1.4826);
            _bottomUpScorer = new InformedBottomUpScorer(_run, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);
            sw.Stop();
            var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            sw.Reset();
            Console.Write(@"Determining precursor masses...");
            sw.Start();
            var ms1Filter = new Ms1IsotopeAndChargeCorrFilter(_run, PrecursorIonTolerance, MinPrecursorIonCharge, MaxPrecursorIonCharge,
                                                              400, 5000, corrThreshold, 0, 0);

            sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            sw.Reset();
            Console.Write(@"Deconvoluting MS2 spectra...");
            sw.Start();
            _ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(
                _run,
                MinProductIonCharge, MaxProductIonCharge,
                new Tolerance(10),
                0
                );
            _ms2ScorerFactory.DeconvoluteAllProductSpectra();
            sw.Stop();
            sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            // Target database
            var targetDb = new FastaDatabase(DatabaseFilePath);

            //            string dirName = OutputDir ?? Path.GetDirectoryName(SpecFilePath);

            var baseName             = Path.GetFileNameWithoutExtension(SpecFilePath);
            var targetOutputFilePath = Path.Combine(OutputDir, baseName + TargetFileExtension);
            var decoyOutputFilePath  = Path.Combine(OutputDir, baseName + DecoyFileExtension);
            var tdaOutputFilePath    = Path.Combine(OutputDir, baseName + TdaFileExtension);

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target))
            {
                sw.Reset();
                Console.Write(@"Reading the target database...");
                sw.Start();
                targetDb.Read();
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.WriteLine(@"Searching the target database");
                sw.Start();
                var targetMatches = RunSearch(GetAnnotationsAndOffsets(targetDb), ms1Filter, false);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Target database search elapsed time: {0:f4} sec", sec);

                sw.Reset();
                Console.Write(@"Rescoring and writing target results...");
                sw.Start();
                WriteResultsToFile(targetMatches, targetOutputFilePath, targetDb);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed time: {0:f4} sec", sec);
            }

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy))
            {
                // Decoy database
                sw.Reset();
                Console.Write(@"Reading the decoy database...");
                sw.Start();
                var decoyDb = targetDb.Decoy(Enzyme);
                decoyDb.Read();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.WriteLine(@"Searching the decoy database");
                sw.Start();
                var decoyMatches = RunSearch(GetAnnotationsAndOffsets(decoyDb), ms1Filter, true);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Decoy database search elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.Write(@"Rescoring and writing decoy results...");
                sw.Start();
                WriteResultsToFile(decoyMatches, decoyOutputFilePath, decoyDb);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed time: {0:f4} sec", sec);
            }

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both))
            {
                var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath);
                if (fdrCalculator.HasError())
                {
                    ErrorMessage = fdrCalculator.ErrorMessage;
                    Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                    return(false);
                }

                fdrCalculator.WriteTo(tdaOutputFilePath);
            }

            Console.WriteLine(@"Done");
            return(true);
        }