Example #1
0
        public void GetIsoProfile()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string protSequence =
                "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR";
            //const string annotation = "_." + protSequence + "._";
            var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;
            }
            seqGraph.SetSink(0);
            var neutral = seqGraph.GetSinkSequenceCompositionWithH2O() - Composition.Hydrogen;

            //Console.WriteLine(neutral);

            for (var charge = 22; charge <= 60; charge++)
            {
                var ion = new Ion(neutral, charge);
                Console.WriteLine("{0}\t{1}", charge, ion.GetMostAbundantIsotopeMz());
            }

            var ion27    = new Ion(neutral, 29);
            var isotopes = ion27.GetIsotopes(0.1);

            foreach (var isotope in isotopes)
            {
                Console.WriteLine("{0}\t{1}", ion27.GetIsotopeMz(isotope.Index), isotope.Ratio);
            }
        }
Example #2
0
        public void TestBuildingReverseGraph()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string annotation = "_.MARTKQTARK._";

            // Configure amino acid set
            var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false);
            //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false);
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);

            var searchModifications = new List <SearchModification>
            {
                methylK,
                //pyroGluQ,
                oxM
            };

            const int numMaxModsPerProtein = 2;

            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation);

            foreach (var composition in seqGraph.GetSequenceCompositions())
            {
                Console.WriteLine("{0}\t{1}", composition, composition.Mass);
            }
        }
Example #3
0
        public void TestBuildingSequenceGraphLongProtein()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Configure amino acid set
            const int numMaxModsPerProtein = 6;
            var       pyroGluQ             = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false);
            var       dehydro      = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false);
            var       cysteinylC   = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false);
            var       glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            var       oxM          = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);

            var searchModifications = new List <SearchModification>
            {
                pyroGluQ,
                //dehydro,
                //cysteinylC,
                //glutathioneC,
                //oxM
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            //const string protAnnotation = "A.HAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK.-";
            //const string protAnnotation =
            //    "_.QQ._";

            const string protAnnotation =
                "_.MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAALRKLMMNHQ._";

            var seqGraph        = SequenceGraph.CreateGraph(aaSet, protAnnotation);
            var seqCompositions = seqGraph.GetSequenceCompositions();

            for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++)
            {
                var seqComposition = seqCompositions[modIndex];
                Console.WriteLine("SequenceComposition: {0}", seqComposition);

                var compIndex    = 0;
                var compositions = seqGraph.GetFragmentCompositions(modIndex, 0).ToList();
                foreach (var composition in compositions)
                {
                    if (compIndex < 5 || compIndex >= compositions.Count - 5)
                    {
                        Console.WriteLine("  Seq: {0}, Frag: {1}", seqComposition, composition);
                    }
                    else if (compIndex == 5)
                    {
                        Console.WriteLine("  ...");
                    }

                    compIndex++;
                }
            }
        }
Example #4
0
        public void TestSumIsoProfilesAcrossDifferentCharges()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            if (!File.Exists(TestRawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath) as PbfLcMsRun;

            //var spec = run.GetSpectrum(46452); // 635.37
            var spec      = run.GetSummedMs1Spectrum(46437, 46466);
            var tolerance = new Tolerance(10);

            const string protSequence =
                "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR";
            //const string annotation = "_." + protSequence + "._";
            var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;
            }
            seqGraph.SetSink(0);
            var neutral = seqGraph.GetSinkSequenceCompositionWithH2O();

            var theoProfile = neutral.GetIsotopomerEnvelopeRelativeIntensities();
            var expProfile  = new double[theoProfile.Length];

            for (var charge = 22; charge <= 45; charge++)
            {
                var ion          = new Ion(neutral, charge);
                var isotopePeaks = spec.GetAllIsotopePeaks(ion, tolerance, 0.1);
                if (isotopePeaks == null)
                {
                    continue;
                }
                Assert.True(isotopePeaks.Length == theoProfile.Length);
                for (var i = 0; i < isotopePeaks.Length; i++)
                {
                    if (isotopePeaks[i] != null)
                    {
                        expProfile[i] += isotopePeaks[i].Intensity;
                    }
                }
            }
            for (var i = 0; i < theoProfile.Length; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}", neutral.GetIsotopeMass(i), theoProfile[i], expProfile[i] / expProfile.Max());
            }
            Console.WriteLine("Corr: " + FitScoreCalculator.GetPearsonCorrelation(theoProfile, expProfile));
        }
Example #5
0
        public IcScores GetScores(ProductSpectrum spec, string seqStr, Composition composition, int charge, int ms2ScanNum)
        {
            if (spec == null)
            {
                return(null);
            }
            var scorer   = new CompositeScorer(spec, Tolerance, MinProductCharge, Math.Min(MaxProductCharge, charge), activationMethod: ActivationMethod);
            var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return(null);
            }

            var bestScore = double.NegativeInfinity;
            Tuple <double, string> bestScoreAndModifications = null;
            var protCompositions = seqGraph.GetSequenceCompositions();

            for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++)
            {
                seqGraph.SetSink(modIndex);
                var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();

                if (!protCompositionWithH2O.Equals(composition))
                {
                    continue;
                }

                var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(scorer);
                var curScore = curScoreAndModifications.Item1;

                if (!(curScore > bestScore))
                {
                    continue;
                }

                bestScoreAndModifications = curScoreAndModifications;
                bestScore = curScore;
            }

            if (bestScoreAndModifications == null)
            {
                return(null);
            }

            var modifications = bestScoreAndModifications.Item2;
            var seqObj        = Sequence.CreateSequence(seqStr, modifications, AminoAcidSet);

            double score;
            int    nMatchedFragments;

            GetCompositeScores(seqObj, charge, ms2ScanNum, out score, out nMatchedFragments);
            return(new IcScores(nMatchedFragments, score, modifications));
        }
Example #6
0
        public void TestNumberOfProteoforms()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string annotation = "_.AMCMC._";
            const string annotation = "_.MARTKQTARK._";

            // Configure amino acid set
            var methylK          = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false);
            var pyroGluQ         = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false);
            var oxM              = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true);
            var methylC          = new SearchModification(Modification.Methylation, 'C', SequenceLocation.Everywhere, true);
            var acetylN          = new SearchModification(Modification.Acetylation, '*', SequenceLocation.PeptideNTerm, false);

            var searchModifications = new List <SearchModification>
            {
                //carbamidomethylC,
                //methylC,
                methylK,
                //pyroGluQ,
                oxM,
                //acetylN
            };

            const int numMaxModsPerProtein = 4;

            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var seqGraph         = SequenceGraph.CreateGraph(aaSet, annotation);
            var protCompositions = seqGraph.GetSequenceCompositions();
            var modCombs         = seqGraph.GetModificationCombinations();

            Console.WriteLine("\n#Protoeoforms by mod combinations: ");
            for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++)
            {
                Console.Write((modIndex == 0) ? "No modifications" : modCombs[modIndex].ToString());
                Console.Write("\t");
                Console.WriteLine("{0}", seqGraph.GetNumProteoformSequences(modIndex));
            }

            Console.WriteLine("\n#Protoeoforms by number of modificaionts: ");
            for (var nMod = 0; nMod <= numMaxModsPerProtein; nMod++)
            {
                Console.Write("#modificaitons = {0}", nMod);
                Console.Write("\t");
                Console.WriteLine("{0}", seqGraph.GetNumProteoformSequencesByNumMods(nMod));
            }
        }
Example #7
0
        public void TestCreatingHistoneGraph()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const int numMaxModsPerProtein = 11;

            // Histone H4
            const string annotation =
                "_.MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG._";

            // Histone H3.1
//            const string annotation =
//                "_.MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA._";

            var acetylR    = new SearchModification(Modification.Acetylation, 'R', SequenceLocation.Everywhere, false);
            var acetylK    = new SearchModification(Modification.Acetylation, 'K', SequenceLocation.Everywhere, false);
            var methylR    = new SearchModification(Modification.Methylation, 'R', SequenceLocation.Everywhere, false);
            var methylK    = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false);
            var diMethylR  = new SearchModification(Modification.DiMethylation, 'R', SequenceLocation.Everywhere, false);
            var diMethylK  = new SearchModification(Modification.DiMethylation, 'K', SequenceLocation.Everywhere, false);
            var triMethylR = new SearchModification(Modification.TriMethylation, 'R', SequenceLocation.Everywhere, false);
            var phosphoS   = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false);
            var phosphoT   = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false);
            var phosphoY   = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false);

            var searchModifications = new List <SearchModification>
            {
                acetylR,
                acetylK,
                methylR,
                methylK,
                diMethylR,
                diMethylK,
                triMethylR,
                phosphoS,
                phosphoT,
                phosphoY
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            var numFragCompositions = graph.GetNumFragmentCompositions();
            var numProteoforms      = graph.GetNumProteoformCompositions();
            var numSeqCompositions  = graph.GetNumProteoformCompositions();

            Console.WriteLine("NumFragmentCompositions: " + numFragCompositions);
            Console.WriteLine("NumProteoforms: " + numProteoforms);
            Console.WriteLine("NumSequenceCompositions: " + numSeqCompositions);
        }
Example #8
0
        public void TestGettingSequence()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string annotation = "_.AMCMC._";
            const string annotation = "_.MARTKQTARK._";

            // Configure amino acid set
            var methylK          = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false);
            var pyroGluQ         = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false);
            var oxM              = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true);
            var methylC          = new SearchModification(Modification.Methylation, 'C', SequenceLocation.Everywhere, true);
            var acetylN          = new SearchModification(Modification.Acetylation, '*', SequenceLocation.PeptideNTerm, false);

            var searchModifications = new List <SearchModification>
            {
                //carbamidomethylC,
                //methylC,
                methylK,
                //pyroGluQ,
                oxM,
                //acetylN
            };

            const int numMaxModsPerProtein = 2;

            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var seqGraph         = SequenceGraph.CreateGraph(aaSet, annotation);
            var protCompositions = seqGraph.GetSequenceCompositions();

            for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++)
            {
                seqGraph.SetSink(modIndex);

                var composition = protCompositions[modIndex];
                Console.WriteLine("{0}\t{1}", composition, composition.Mass);
                var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(new DummyScorer());
                if (curScoreAndModifications != null)
                {
                    Console.WriteLine("Score: {0}, Modifications: {1}", curScoreAndModifications.Item1, curScoreAndModifications.Item2);
                }
            }
        }
Example #9
0
        public IcScores GetIcScores(IInformedScorer informedScorer, IScorer scorer, string seqStr, Composition composition)
        {
            var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return(null);
            }

            var bestScore = double.NegativeInfinity;
            Tuple <double, string> bestScoreAndModifications = null;
            var protCompositions = seqGraph.GetSequenceCompositions();

            for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++)
            {
                seqGraph.SetSink(modIndex);
                var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();

                if (!protCompositionWithH2O.Equals(composition))
                {
                    continue;
                }

                var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(scorer);
                var curScore = curScoreAndModifications.Item1;

                if (!(curScore > bestScore))
                {
                    continue;
                }

                bestScoreAndModifications = curScoreAndModifications;
                bestScore = curScore;
            }

            if (bestScoreAndModifications == null)
            {
                return(null);
            }

            var modifications       = bestScoreAndModifications.Item2;
            var sequence            = Sequence.CreateSequence(seqStr, modifications, this.AminoAcidSet);
            var numMatchedFragments = informedScorer.GetNumMatchedFragments(sequence);
            var score = informedScorer.GetUserVisibleScore(sequence);

            return(new IcScores(numMatchedFragments, score, modifications));
        }
Example #10
0
        public void TestGraphWithModifications()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string annotation = "_.MIALNKTPQTIVFYKPYGVLCQFTDNSAHPRPTLKDYINLPDLYPVGRLDQDSEGLLLLTSNGKLQHRLAHREFAHQRTYFAQVEGSPTDEDLEPLRRGITFADYPTRPAIAKIITEPDFPPRNPPIRYRASIPTSWLSITLTEGRNRQVRRMTAAVGFPTLRLVRVQIQVTGRSPQQGKGKSAATWCLTLEGLSPGQWRPLTPWEENFCQQLLTGNPNGPWQKKFGDRR._";

            var oxM              = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC         = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC     = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            var nitrosylC        = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false);
            var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                glutathioneC,
                nitrosylC,
                nethylmaleimideC,
                oxM
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var seqGraph        = SequenceGraph.CreateGraph(aaSet, annotation);
            var seqCompositions = seqGraph.GetSequenceCompositions();
            var modCombs        = seqGraph.GetModificationCombinations();

            Console.WriteLine("*** Before cleavage: {0}", seqCompositions.Length);
            for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++)
            {
                var seqComposition = seqCompositions[modIndex];
                Console.WriteLine("SequenceComposition: {0}, ModComb: {1}", seqComposition, modCombs[modIndex]);
            }

            seqGraph.CleaveNTerm();
            seqCompositions = seqGraph.GetSequenceCompositions();
            modCombs        = seqGraph.GetModificationCombinations();
            Console.WriteLine("*** After cleavage: {0}", seqCompositions.Length);
            for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++)
            {
                var seqComposition = seqCompositions[modIndex];
                Console.WriteLine("SequenceComposition: {0}, ModComb: {1}", seqComposition, modCombs[modIndex]);
            }
        }
Example #11
0
        public void TestSequenceGraph()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            ShowStarting(methodName);

            var phosPhoS            = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false);
            var phosPhoT            = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false);
            var phosPhoY            = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false);
            var oxM                 = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var fixCarbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true);

            var searchModifications = new List <SearchModification> {
                phosPhoS, phosPhoT, phosPhoY, oxM, fixCarbamidomethylC
            };
            //var searchModifications = new List<SearchModification> { phosPhoT, fixCarbamidomethylC };
            const int numMaxModsPepPeptide = 2;

            var          aaSet      = new AminoAcidSet(searchModifications, numMaxModsPepPeptide);
            const string annotation = "_.STR._";
            var          pepSeq     = annotation.Substring(2, annotation.Length - 4);

            Console.WriteLine(aaSet.GetComposition(pepSeq));
            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            Console.WriteLine(graph.GetUnmodifiedSequenceComposition());
            Assert.AreEqual(graph.GetUnmodifiedSequenceComposition(), aaSet.GetComposition(pepSeq));

            Console.WriteLine("Annotation Compositions:");
            var index = -1;

            foreach (var composition in graph.GetSequenceCompositions())
            {
                Console.WriteLine(++index + ": " + composition);
            }

            //const int seqIndex = 1;
            //Console.WriteLine("Fragment Compositions (" + seqIndex +")");
            //var scoringGraph = graph.GetScoringGraph(seqIndex);
            //foreach (var composition in scoringGraph.GetCompositions())
            //{
            //    Console.WriteLine(composition);
            //}
        }
Example #12
0
        public void TestPsm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            const char   pre        = 'R';
            const string sequence   = "LENWPPASLADDL";
            const char   post       = 'A';
            const string annotation = "R.LENWPPASLADDL._";
            const int    charge     = 2;
            const int    ms2ScanNum = 25534;

            var aaSet = new AminoAcidSet();

            var run       = InMemoryLcMsRun.GetLcMsRun(specFilePath, 0, 0);
            var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 2, 10, 0, 1.1);

            ms2Scorer.DeconvoluteAllProductSpectra();
            var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum);

            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            graph.SetSink(0);
            var score = graph.GetFragmentScore(scorer);

            Console.WriteLine("Fast search score: " + score);
            var composition = graph.GetSinkSequenceCompositionWithH2O();

            var informedScorer = new InformedBottomUpScorer(run, aaSet, 1, 15, new Tolerance(10));
            var refinedScore   = informedScorer.GetScores(pre, sequence, post, composition, charge, ms2ScanNum);

            Console.WriteLine("RefinedScores: {0}", refinedScore);
        }
Example #13
0
        public void TestNTermMods()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string annotation = "_.QARTKQTARK._";

            // Configure amino acid set
            var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);

            var searchModifications = new List <SearchModification>
            {
                acetylN,
                pyroGluQ,
                //oxM
            };

            const int numMaxModsPerProtein = 2;

            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            //aaSet.Display();
            var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation);

            foreach (var composition in seqGraph.GetSequenceCompositions())
            {
                Console.WriteLine("{0}\t{1}", composition, composition.Mass);
            }

            Console.WriteLine("*** Cleave N-term");
            seqGraph.CleaveNTerm();
            foreach (var composition in seqGraph.GetSequenceCompositions())
            {
                Console.WriteLine("{0}\t{1}", composition, composition.Mass);
            }
        }
Example #14
0
        public void TestSmartIsoWindowSumming()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            if (!File.Exists(TestRawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath);
            }

            const string protSequence =
                "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR";
            //const string annotation = "_." + protSequence + "._";
            var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;
            }
            seqGraph.SetSink(0);
            var neutral   = seqGraph.GetSinkSequenceCompositionWithH2O();
            var ion       = new Ion(neutral, 43);
            var tolerance = new Tolerance(10);

            const int targetMs2ScanNum = 46562;
            var       run     = PbfLcMsRun.GetLcMsRun(TestRawFilePath) as PbfLcMsRun;
            var       ms2Spec = run.GetSpectrum(targetMs2ScanNum) as ProductSpectrum;

            Assert.True(ms2Spec != null);
            var isoWindow = ms2Spec.IsolationWindow;
            //var prevScanNum = run.GetPrevScanNum(targetMs2ScanNum, 1);
            //var nextScanNum = run.GetNextScanNum(targetMs2ScanNum, 1);
            var summedSpec = run.GetSummedMs1Spectrum(targetMs2ScanNum, 2.5);

            //var windowSpec = summedSpec.GetPeakListWithin(isoWindow.MinMz, isoWindow.MaxMz);
            Console.WriteLine("Corr: " + summedSpec.GetCorrScore(ion, tolerance));
        }
Example #15
0
        private static void TestCountingPeptides()
        {
            var aaSet = new AminoAcidSet();

            var sw = new Stopwatch();

            sw.Start();

            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002166_F86E3B2F.fasta";
            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_003456_9B916A8B.fasta";
            //            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta";
            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);

            indexedDb.Read();
            //var numPeptides = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 150).LongCount();
            var peptides =
                indexedDb.AnnotationsAndOffsets(7, 40, 2, 2, Enzyme.Trypsin);

            Parallel.ForEach(peptides, annotationAndOffset =>
                             //foreach(var annotationAndOffset in peptides)
            {
                var annotation = annotationAndOffset.Annotation;
                var offset     = annotationAndOffset.Offset;

                var graph = SequenceGraph.CreateGraph(aaSet, annotation);
            }
                             )
            ;

//            Console.WriteLine("NumPeptides: {0}", numPeptides);
            sw.Stop();
            var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;

            Console.WriteLine(@"{0:f4} sec", sec);
        }
Example #16
0
 public void ScorePeptides(string outputFilePath)
 {
     using (var writer = new StreamWriter(outputFilePath))
     {
         writer.WriteLine("Annotation\tCharge\tScanNum");
         foreach (var annotation in PeptideEnumerator)
         {
             // annotation: pre + "." + peptide + "." + post (e.g. R.PEPTIDER.G)
             var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, annotation);
             foreach (var sequenceComposition in seqGraph.GetSequenceCompositions())
             {
                 var peptideComposition = sequenceComposition + Composition.H2O;
                 for (var precursorCharge = MinCharge; precursorCharge <= MaxCharge; precursorCharge++)
                 {
                     var precursorIon = new Ion(peptideComposition, precursorCharge);
                     foreach (var scanNum in Run.GetFragmentationSpectraScanNums(precursorIon))
                     {
                         writer.WriteLine("{0}\t{1}\t{2}", annotation, precursorCharge, scanNum);
                     }
                 }
             }
         }
     }
 }
        private void FindFeatures()
        {
            m_FeatureFinderBackgroundWorker.ReportProgress(0, "Finding 3-D Features for Precursor and Fragments");

            var seqGraph = SequenceGraph.CreateGraph(m_aminoAcidSet, CurrentPeptide);
            // var scoringGraph = seqGraph.GetScoringGraph(0);
            // var precursorIon = scoringGraph.GetPrecursorIon(this.CurrentChargeState);
            // var monoMz = precursorIon.GetMz();

            var sequence     = new Sequence(CurrentPeptide, m_aminoAcidSet);
            var precursorIon = sequence.GetPrecursorIon(CurrentChargeState);
            var monoMz       = precursorIon.GetMonoIsotopicMz();

            var uimfPointList      = UimfUtil.GetXic(monoMz, CurrentTolerance, UIMFData.FrameType.MS1, DataReader.ToleranceType.PPM);
            var watershedPointList = WaterShedMapUtil.BuildWatershedMap(uimfPointList);

            var smoother = new SavitzkyGolaySmoother(11, 2);

            smoother.Smooth(ref watershedPointList);

            FeatureList = FeatureDetection.DoWatershedAlgorithm(watershedPointList).ToList();

            IsotopeFeaturesDictionary.Clear();
            var precursorTargetList = CurrentChargeState == 2 ? new List <string> {
                "-1", "0.5", "1", "1.5", "2", "3"
            } : new List <string> {
                "-1", "1", "2", "3"
            };

            foreach (var precursorTarget in precursorTargetList)
            {
                var targetMz = precursorIon.GetIsotopeMz(double.Parse(precursorTarget));

                var isotopeUimfPointList      = UimfUtil.GetXic(targetMz, CurrentTolerance, UIMFData.FrameType.MS1, DataReader.ToleranceType.PPM);
                var isotopeWatershedPointList = WaterShedMapUtil.BuildWatershedMap(isotopeUimfPointList);

                var isotopeFeatures = FeatureDetection.DoWatershedAlgorithm(isotopeWatershedPointList).ToList();
                IsotopeFeaturesDictionary.Add(precursorTarget, isotopeFeatures);
            }

            LcSlicePlot  = new PlotModel();
            ImsSlicePlot = new PlotModel();

            FragmentFeaturesDictionary.Clear();
            // var sequence = new Sequence(this.CurrentPeptide, m_aminoAcidSet);
            var ionTypeDictionary = sequence.GetProductIons(m_ionTypeFactory.GetAllKnownIonTypes());

            double fragmentCount = ionTypeDictionary.Count;
            var    index         = 0;

            foreach (var ionTypeKvp in ionTypeDictionary)
            {
                var ionTypeTuple = ionTypeKvp.Key;

                var ion        = ionTypeKvp.Value;
                var fragmentMz = ion.GetMonoIsotopicMz();

                uimfPointList      = UimfUtil.GetXic(fragmentMz, CurrentTolerance, UIMFData.FrameType.MS2, DataReader.ToleranceType.PPM);
                watershedPointList = WaterShedMapUtil.BuildWatershedMap(uimfPointList);
                smoother.Smooth(ref watershedPointList);

                var fragmentFeatureBlobList = FeatureDetection.DoWatershedAlgorithm(watershedPointList).ToList();
                FragmentFeaturesDictionary.Add(ionTypeTuple, fragmentFeatureBlobList);

                index++;
                var progress = (int)((index / fragmentCount) * 100);
                m_FeatureFinderBackgroundWorker.ReportProgress(progress);
            }

            OnPropertyChanged("FeatureList");
            OnPropertyChanged("LcSlicePlot");
            OnPropertyChanged("ImsSlicePlot");
        }
Example #18
0
        public void TestGeneratingXicsOfAllCharges()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            if (!File.Exists(TestRawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath);
            }

            var          run          = PbfLcMsRun.GetLcMsRun(TestRawFilePath, 0.0, 0.0);
            var          comparer     = new MzComparerWithBinning(27);
            const string protSequence =
                "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR";
            //const string annotation = "_." + protSequence + "._";
            var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;
            }
            seqGraph.SetSink(0);
            var neutral     = seqGraph.GetSinkSequenceCompositionWithH2O() - Composition.Hydrogen;
            var proteinMass = neutral.Mass;
            var isoEnv      = Averagine.GetIsotopomerEnvelope(proteinMass);

            const bool SHOW_ALL_SCANS = false;
            var        targetColIndex = 0;

            #pragma warning disable 0162
            if (SHOW_ALL_SCANS)
            {
                Console.WriteLine("Charge\t" + string.Join("\t", run.GetScanNumbers(1)));
            }
            else
            {
                // Just display data for scan 161
                Console.WriteLine("Charge\t161");
                foreach (var scanNumber in run.GetScanNumbers(1))
                {
                    if (scanNumber == 161)
                    {
                        break;
                    }
                    targetColIndex++;
                }
            }
            #pragma warning restore 0162

            const int minCharge = 2;
            const int maxCharge = 60;
            for (var charge = minCharge; charge <= maxCharge; charge++)
            {
                var ion = new Ion(neutral, charge);
                var mostAbundantIsotopeMz = ion.GetIsotopeMz(isoEnv.MostAbundantIsotopeIndex);
                //var secondMostAbundantIsotopeMz = ion.GetIsotopeMz(isoEnv.MostAbundantIsotopeIndex + 1);
                var binNum  = comparer.GetBinNumber(mostAbundantIsotopeMz);
                var mzStart = comparer.GetMzStart(binNum);
                var mzEnd   = comparer.GetMzEnd(binNum);

                var xic = run.GetFullPrecursorIonExtractedIonChromatogram(mzStart, mzEnd);
                Console.Write(charge + "\t");

                #pragma warning disable 0162
                if (SHOW_ALL_SCANS)
                {
                    Console.WriteLine(string.Join("\t", xic.Select(p => p.Intensity)));
                }
                else
                {
                    Console.WriteLine(xic[targetColIndex].Intensity);
                }
                #pragma warning restore 0162
            }
        }
Example #19
0
        public void TestCorrMatchedPeakCounter()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // Parameters
            var precursorIonTolerance = new Tolerance(10);
            var productIonTolerance   = new Tolerance(10);

            var sw = new System.Diagnostics.Stopwatch();

            var aaSet = new AminoAcidSet();

            const string protAnnotation = "_.TMNITSKQMEITPAIRQHVADRLAKLEKWQTHLINPHIILSKEPQGFIADATINTPNGHLVASAKHEDMYTAINELINKLERQLNKVQHKGEAR._";

            // Create a sequence graph
            var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation);

            Assert.NotNull(seqGraph, "Invalid sequence: {0}", protAnnotation);

            const string specFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SBEP_STM_001_02272012_Aragon.raw";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);

            sw.Start();
            var precursorFilter = new Ms1ContainsIonFilter(run, precursorIonTolerance);

            var seqCompositionArr = seqGraph.GetSequenceCompositions();

            Console.WriteLine("Length: {0}\tNumCompositions: {1}", protAnnotation.Length - 4, seqCompositionArr.Length);

            const int charge     = 9;
            const int modIndex   = 0;
            const int ms2ScanNum = 3633;

            var seqComposition     = seqCompositionArr[modIndex];
            var peptideComposition = seqComposition + Composition.H2O;

            peptideComposition.GetIsotopomerEnvelopeRelativeIntensities();

            Console.WriteLine("Composition: {0}, AveragineMass: {1}", seqComposition, seqComposition.Mass);
            seqGraph.SetSink(modIndex);

            var precursorIon = new Ion(peptideComposition, charge);

            Assert.True(precursorFilter.IsValid(precursorIon, ms2ScanNum));

            var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;

            Assert.True(spec != null);

            //var scorer = new MatchedPeakCounter(spec, productIonTolerance, 1, 10);
            var scorer = new CorrMatchedPeakCounter(spec, productIonTolerance, 1, 10);
            var score  = seqGraph.GetFragmentScore(scorer);

            Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", protAnnotation, charge, precursorIon.GetMostAbundantIsotopeMz(), ms2ScanNum, score);

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Example #20
0
        public void TestMatchedPeakCounter()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // Parameters
            var precursorIonTolerance = new Tolerance(15);
            var productIonTolerance   = new Tolerance(15);

            var sw = new System.Diagnostics.Stopwatch();

            var aaSet = new AminoAcidSet();

            const string protAnnotation = "_.MFQQEVTITAPNGLHTRPAAQFVKEAKGFTSEITVTSNGKSASAKSLFKLQTLGLTQGTVVTISAEGEDEQKAVEHLVKLMAELE._";

            // Create a sequence graph
            var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation);

            Assert.NotNull(seqGraph, "Invalid sequence: {0}", protAnnotation);

            const string specFilePath = @"\\protoapps\UserData\Jungkap\Joshua\testData\SBEP_STM_001_02272012_Aragon.raw";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);

            sw.Start();
            var precursorFilter = new Ms1ContainsIonFilter(run, precursorIonTolerance);

            var seqCompositionArr = seqGraph.GetSequenceCompositions();

            Console.WriteLine("Length: {0}\tNumCompositions: {1}", protAnnotation.Length - 4, seqCompositionArr.Length);

            const int charge     = 6;
            const int modIndex   = 0;
            const int ms2ScanNum = 4448;

            var seqComposition     = seqCompositionArr[modIndex];
            var peptideComposition = seqComposition + Composition.H2O;

            peptideComposition.GetIsotopomerEnvelopeRelativeIntensities();

            Console.WriteLine("Composition: {0}, AveragineMass: {1}", seqComposition, seqComposition.Mass);
            seqGraph.SetSink(modIndex);

            var precursorIon = new Ion(peptideComposition, charge);

            Assert.True(precursorFilter.IsValid(precursorIon, ms2ScanNum));

            var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;

            Assert.True(spec != null);

            var scorer = new MatchedPeakCounter(spec, productIonTolerance, 1, 10);
            var score  = seqGraph.GetFragmentScore(scorer);

            Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", protAnnotation, charge, precursorIon.GetMostAbundantIsotopeMz(), ms2ScanNum, score);

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Example #21
0
        }                                                         // true: target and decoy, false: target only, null: decoy only

        public void QuickId()
        {
            const string rawFilePath   = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            const string modFilePath   = @"H:\Research\QCShew_TopDown\Production\Mods.txt";
            const int    numBits       = 29; // max error: 4ppm
            const int    minCharge     = 1;
            const int    maxCharge     = 20;
            var          tolerance     = new Tolerance(10);
            const double corrThreshold = 0.7;

            var          comparer        = new MzComparerWithBinning(numBits);
            const double minFragmentMass = 200.0;
            const double maxFragmentMass = 50000.0;
            var          minFragMassBin  = comparer.GetBinNumber(minFragmentMass);
            var          maxFragMassBin  = comparer.GetBinNumber(maxFragmentMass);

            var aminoAcidSet = new AminoAcidSet(modFilePath);

            var run           = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var ms2ScanNumArr = run.GetScanNumbers(2).ToArray();

            var sw = new Stopwatch();

            sw.Start();
            Console.Write("Building Spectrum Arrays...");
            var massVectors = new BitArray[maxFragMassBin - minFragMassBin + 1];

            for (var i = minFragMassBin; i <= maxFragMassBin; i++)
            {
                massVectors[i - minFragMassBin] = new BitArray(run.MaxLcScan + 1);
            }

            foreach (var ms2ScanNum in ms2ScanNumArr)
            {
                var productSpec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                if (productSpec == null)
                {
                    continue;
                }

                var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(productSpec.Peaks, minCharge, maxCharge, 2, 1.1, tolerance, corrThreshold);

                if (deconvolutedPeaks == null)
                {
                    continue;
                }

                foreach (var p in deconvolutedPeaks)
                {
                    var mass      = p.Mass;
                    var deltaMass = tolerance.GetToleranceAsDa(mass, 1);
                    var minMass   = mass - deltaMass;
                    var maxMass   = mass + deltaMass;

                    var minBinNum = comparer.GetBinNumber(minMass);
                    var maxBinNum = comparer.GetBinNumber(maxMass);
                    for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
                    {
                        if (binNum >= minFragMassBin && binNum <= maxFragMassBin)
                        {
                            massVectors[binNum - minFragMassBin][ms2ScanNum] = true;
                        }
                    }
                }
            }
            sw.Stop();
            Console.WriteLine(@"{0:f1} sec.", sw.Elapsed.TotalSeconds);

            sw.Reset();
            sw.Start();
            var fastaDb = new FastaDatabase(fastaFilePath);

            fastaDb.Read();
            var indexedDb   = new IndexedDatabase(fastaDb);
            var numProteins = 0;
            var intactProteinAnnotationAndOffsets =
                indexedDb.IntactSequenceAnnotationsAndOffsets(0, int.MaxValue);

            var bestProtein = new string[run.MaxLcScan + 1];
            var bestScore   = new int[run.MaxLcScan + 1];

            foreach (var annotationAndOffset in intactProteinAnnotationAndOffsets)
            {
                if (++numProteins % 10 == 0)
                {
                    Console.WriteLine(@"Processing, {0} proteins done, {1:f1} sec elapsed",
                                      numProteins,
                                      sw.Elapsed.TotalSeconds);
                }
                var annotation = annotationAndOffset.Annotation;
                var offset     = annotationAndOffset.Offset;

                var protSequence = annotation.Substring(2, annotation.Length - 4);

                // suffix
                var seqGraph = SequenceGraph.CreateGraph(aminoAcidSet, AminoAcid.ProteinNTerm, protSequence,
                                                         AminoAcid.ProteinCTerm);
                if (seqGraph == null)
                {
                    continue;
                }

                for (var numNTermCleavage = 0; numNTermCleavage <= 1; numNTermCleavage++)
                {
                    if (numNTermCleavage > 0)
                    {
                        seqGraph.CleaveNTerm();
                    }
                    var allCompositions = seqGraph.GetAllFragmentNodeCompositions();

                    var scoreArr = new int[run.MaxLcScan + 1];
                    foreach (var fragComp in allCompositions)
                    {
                        var suffixMass = fragComp.Mass + BaseIonType.Y.OffsetComposition.Mass;
                        var binNum     = comparer.GetBinNumber(suffixMass);
                        if (binNum < minFragMassBin || binNum > maxFragMassBin)
                        {
                            continue;
                        }

                        var vector = massVectors[binNum - minFragMassBin];
                        foreach (var ms2ScanNum in ms2ScanNumArr)
                        {
                            if (vector[ms2ScanNum])
                            {
                                ++scoreArr[ms2ScanNum];
                            }
                        }
                    }
                    foreach (var ms2ScanNum in ms2ScanNumArr)
                    {
                        if (scoreArr[ms2ScanNum] > bestScore[ms2ScanNum])
                        {
                            bestScore[ms2ScanNum] = scoreArr[ms2ScanNum];
                            var proteinName = fastaDb.GetProteinName(offset);
                            bestProtein[ms2ScanNum] = proteinName + (numNTermCleavage == 1 ? "'" : "");
                        }
                    }
                }
                // prefix
            }

            Console.WriteLine("ScanNum\tBestProtein\tScore");
            foreach (var ms2ScanNum in ms2ScanNumArr)
            {
                Console.WriteLine("{0}\t{1}\t{2}", ms2ScanNum, bestScore[ms2ScanNum], bestProtein[ms2ScanNum] ?? "");
            }
        }
Example #22
0
        public void TestPrSm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw";
            //const string annotation =
            //    "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" +
            //    "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" +
            //    "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" +
            //    "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" +
            //    "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" +
            //    "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" +
            //    "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._";
            //var aaSet = new AminoAcidSet();

            //const int charge = 60;
            //const int ms2ScanNum = 46661;

            const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            const int    ms2ScanNum = 19011;
            const int    charge     = 7;
            const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._";

            var acetylN    = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true);
            var modVal     = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0));
            var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList();

            searchMods.Add(acetylN);
            const int numMaxModsPerProtein = 1;
            var       aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein);

            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions());

            var run       = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);
            var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15);

            ms2Scorer.GetScorer(ms2ScanNum);
            var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum);

            Assert.NotNull(scorer, "Scorer is null!");

            for (var i = 0; i < graph.GetNumProteoformCompositions(); i++)
            {
                graph.SetSink(i);
                Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]);
                var score = graph.GetFragmentScore(scorer);
                Console.WriteLine("Fast search score: " + score);
                var composition = graph.GetSinkSequenceCompositionWithH2O();

                var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10));
                var refinedScore   = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum);
                Console.WriteLine("Modifications: {0}", refinedScore.Modifications);
                Console.WriteLine("Composition: {0}", composition);
                Console.WriteLine("RefinedScores: {0}", refinedScore);
            }
        }
Example #23
0
        public void Test43KProtein()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Configure amino acid set
            var acetylN              = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM                  = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC             = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC         = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            var dethiomethylM        = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false);
            var deamidatedN          = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false);
            var deamidatedQ          = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false);
            var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C',
                                                              SequenceLocation.ProteinNTerm, false);
            var phosphoS         = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false);
            var phosphoT         = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false);
            var phosphoY         = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false);
            var nitrosylC        = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false);
            var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                dethiomethylM,
                acetylN,
                //phosphoS,
                //phosphoT,
                //phosphoY,
                deamidatedN,
//                deamidatedQ,
                glutathioneC,
                pyroCarbamidomethylC,
                nitrosylC,
                nethylmaleimideC
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

//            var aaSet = new AminoAcidSet();

            if (!File.Exists(TestRawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath);
            }

            var          run          = PbfLcMsRun.GetLcMsRun(TestRawFilePath);
            const string protSequence =
                "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR";
            const string annotation = "_." + protSequence + "._";
            var          seqGraph   = SequenceGraph.CreateGraph(aaSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;
            }

            var ms1Filter        = new SimpleMs1Filter();
            var ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(run);

            foreach (var ms2ScanNum in Ms2ScanNums)
            {
                ms2ScorerFactory.GetScorer(ms2ScanNum);
            }

            for (var numNTermCleavages = 0; numNTermCleavages <= 0; numNTermCleavages++)
            {
                if (numNTermCleavages > 0)
                {
                    seqGraph.CleaveNTerm();
                }
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs       = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass           = protCompositionWithH2O.Mass;
                    var modCombinations        = modCombs[modIndex];

                    foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass))
                    {
                        var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                        if (spec == null)
                        {
                            continue;
                        }
                        var charge =
                            (int)
                            Math.Round(sequenceMass /
                                       (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton));
                        var scorer = ms2ScorerFactory.GetMs2Scorer(ms2ScanNum);
                        var score  = seqGraph.GetFragmentScore(scorer);
                        if (score <= 3)
                        {
                            continue;
                        }

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence     = protSequence.Substring(numNTermCleavages);
                        var pre          = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1];
                        var post         = annotation[annotation.Length - 1];

                        Console.WriteLine("{0}.{1}.{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", pre, sequence, post, ms2ScanNum, modCombinations,
                                          precursorIon.GetMostAbundantIsotopeMz(), precursorIon.Charge, precursorIon.Composition.Mass, score);
                    }
                }
            }
        }
Example #24
0
        public IcBottomUpScores GetScores(char pre, string sequence, char post, AminoAcid nTerm, AminoAcid cTerm, Composition composition, int charge, int ms2ScanNum)
        {
            ScoredSpectrum scoredSpectrum;
            var            index = GetChargetScanNumPairIndex(charge, ms2ScanNum);

            if (!_scoredSpectra.TryGetValue(index, out scoredSpectrum))
            {
                var spec = Run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                if (spec == null)
                {
                    return(null);
                }
                scoredSpectrum = new ScoredSpectrum(spec, _rankScorer, charge, composition.Mass, Tolerance);
                _scoredSpectra.Add(index, scoredSpectrum);
            }

            var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, nTerm, sequence, cTerm);

            if (seqGraph == null)
            {
                return(null);
            }

            Tuple <double, string> scoreAndModifications = null;
            var bestScore        = double.NegativeInfinity;
            var protCompositions = seqGraph.GetSequenceCompositions();

            for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++)
            {
                seqGraph.SetSink(modIndex);
                var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                if (!protCompositionWithH2O.Equals(composition))
                {
                    continue;
                }

                var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(scoredSpectrum);
                var curScore = curScoreAndModifications.Item1;
                if (curScore > bestScore)
                {
                    scoreAndModifications = curScoreAndModifications;
                    bestScore             = curScore;
                }
            }

            if (scoreAndModifications == null)
            {
                return(null);
            }

            var ms2Score = scoreAndModifications.Item1;

            // TODO: This assumes enzyme is trypsin
            const double probN = 0.99999;
            const double probC = 0.99999;
            const double sumAAProbabilities = 0.1;
            var          creditN            = Math.Log(probN / sumAAProbabilities);
            var          penaltyN           = Math.Log((1.0 - probN) / (1.0 - sumAAProbabilities));
            var          creditC            = Math.Log(probC / sumAAProbabilities);
            var          penaltyC           = Math.Log((1.0 - probC) / (1.0 - sumAAProbabilities));

            if (pre == 'K' || pre == 'R' || pre == FastaDatabaseConstants.Delimiter || pre == '-')
            {
                ms2Score += creditN;
            }
            else
            {
                ms2Score += penaltyN;
            }

            var lastResidue = sequence[sequence.Length - 1];

            if (lastResidue == 'K' || lastResidue == 'R' || post == FastaDatabaseConstants.Delimiter || post == '-')
            {
                ms2Score += creditC;
            }
            else
            {
                ms2Score += penaltyC;
            }

            var modifications = scoreAndModifications.Item2;

            return(new IcBottomUpScores(ms2Score, modifications));
        }
Example #25
0
        public void TestTopDownScoringForAllXics()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Search parameters
            const int numNTermCleavages = 1;  // 30
            const int minLength         = 7;
            const int maxLength         = 1000;
            //const int minCharge = 5; // 3
            //const int maxCharge = 15; // 67
            const int    numMaxModsPerProtein = 0; // 6
            var          precursorTolerance   = new Tolerance(10);
            const string dbFilePath           = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.fasta";
            //const string dbFilePath = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.icdecoy.KR.fasta";

            //const string dbFilePath = @"..\..\..\TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";
            // const string dbFilePath =
            //    @"C:\cygwin\home\kims336\Data\TopDown\ID_003558_56D73071.fasta";

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            Console.Write("Reading raw file...");
            const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw";
            var          run          = InMemoryLcMsRun.GetLcMsRun(specFilePath);

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);

            // Configure amino acid set
            //            var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false);
            var dehydro      = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false);
            var cysteinylC   = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            //            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);

            var searchModifications = new List <SearchModification>
            {
                //pyroGluQ,
                dehydro,
                cysteinylC,
                glutathioneC,
                //oxM
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var targetDb = new FastaDatabase(dbFilePath);
            //   targetDb.CreateDecoyDatabase(Enzyme.Trypsin);
            //   System.Environment.Exit(1);
            var indexedDb = new IndexedDatabase(targetDb);

            var  numProteins           = 0;
            long totalProtCompositions = 0;

            //long numXics = 0;
            TopDownScorer.MaxCharge = 25;
            TopDownScorer.MinCharge = 8;

            sw.Reset();
            sw.Start();
            Console.WriteLine("Generating XICs...");

            foreach (var protAnnotationAndOffset in indexedDb.IntactSequenceAnnotationsAndOffsets(minLength, maxLength))
            {
                ++numProteins;
                //if (numProteins > 2000) break;

                if (numProteins % 1000 == 0)
                {
                    Console.WriteLine("Processed {0} proteins", numProteins);
                }

                //Console.WriteLine(protAnnotation);

                var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotationAndOffset.Annotation);

                //Console.WriteLine(seqGraph.GetSequenceCompositions()[0]);

                if (seqGraph == null)
                {
                    continue;
                }

                for (var nTermCleavages = 0; nTermCleavages <= numNTermCleavages; nTermCleavages++)
                {
                    if (nTermCleavages > 0)
                    {
                        seqGraph.CleaveNTerm();
                    }
                    var protCompositions = seqGraph.GetSequenceCompositions();
                    foreach (var protComposition in protCompositions)
                    {
                        totalProtCompositions++;
                        // Console.WriteLine(protComposition);
                        var scorer = new TopDownScorer(protComposition, run, precursorTolerance);
                        var score  = scorer.GetScore();

                        Console.WriteLine(score);
                    }
                }
            }

            sw.Stop();
            Console.WriteLine("NumProteins: {0}", numProteins);
            Console.WriteLine("NumProteinCompositions: {0}", totalProtCompositions);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Example #26
0
        private void SearchForMatches(AnnotationAndOffset annotationAndOffset,
                                      ISequenceFilter sequenceFilter, SortedSet <DatabaseSequenceSpectrumMatch>[] matches, int maxNumNTermCleavages, bool isDecoy, CancellationToken?cancellationToken = null)
        {
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            var annotation = annotationAndOffset.Annotation;
            var offset     = annotationAndOffset.Offset;
            //var protein = db.GetProteinName(offset);
            var protSequence = annotation.Substring(2, annotation.Length - 4);
            var seqGraph     = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, protSequence,
                                                         AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;                   // No matches will be found without a sequence graph.
            }
            for (var numNTermCleavages = 0; numNTermCleavages <= maxNumNTermCleavages; numNTermCleavages++)
            {
                if (numNTermCleavages > 0)
                {
                    seqGraph.CleaveNTerm();
                }
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs       = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass           = protCompositionWithH2O.Mass;

                    if (sequenceMass < MinSequenceMass || sequenceMass > MaxSequenceMass)
                    {
                        continue;
                    }

                    var modCombinations = modCombs[modIndex];
                    var ms2ScanNums     = this.ScanNumbers ?? sequenceFilter.GetMatchingMs2ScanNums(sequenceMass);

                    Parallel.ForEach(ms2ScanNums, pfeOptions, ms2ScanNum =>
                    {
                        if (ms2ScanNum > _ms2ScanNums.Last() || ms2ScanNum < _ms2ScanNums.First())
                        {
                            return;
                        }

                        var scorer      = _ms2ScorerFactory2.GetMs2Scorer(ms2ScanNum);
                        var score       = seqGraph.GetFragmentScore(scorer);
                        var isoTargetMz = _isolationWindowTargetMz[ms2ScanNum];
                        if (!(isoTargetMz > 0))
                        {
                            return;
                        }
                        var charge = (int)Math.Round(sequenceMass / (isoTargetMz - Constants.Proton));

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence     = protSequence.Substring(numNTermCleavages);
                        var pre          = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1];
                        var post         = annotation[annotation.Length - 1];
                        var prsm         = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, numNTermCleavages,
                                                                             modCombinations, precursorIon, score, isDecoy);

                        AddMatch(matches, ms2ScanNum, prsm);
                    });
                }
            }
        }
Example #27
0
        public void TestTopDownScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            TopDownScorer.MaxCharge = 25;
            TopDownScorer.MinCharge = 8;

            const string specFilePath   = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw";
            const string protAnnotation = "A.AHAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK.";
            var          dehydro        = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false);
            var          cysteinylC     = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false);
            var          glutathioneC   = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);

            var searchModifications = new List <SearchModification>
            {
                //pyroGluQ,
                dehydro,
                cysteinylC,
                glutathioneC,
                //oxM
            };
            //var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);
            var aaSet = new AminoAcidSet(searchModifications, 0);

            var precursorTolerance = new Tolerance(10);
            //Console.WriteLine(aaSet.GetAminoAcid('C').GetComposition());
            // Create a sequence graph
            //var protSeq = protAnnotation.Substring(2, protAnnotation.Length - 4);

            var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation);

            //  TopDownScorer.MaxCharge = 60;
            //  TopDownScorer.MinCharge = 3;
            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath);

            foreach (var protComposition in seqGraph.GetSequenceCompositions())
            {
                var mostAbundantIsotopeIndex = protComposition.GetMostAbundantIsotopeZeroBasedIndex();
                Console.WriteLine("Composition\t{0}", protComposition);
                Console.WriteLine("MostAbundantIsotopeIndex\t{0}", mostAbundantIsotopeIndex);

                Console.WriteLine(new Ion(protComposition + Composition.H2O, 11).GetIsotopeMz(mostAbundantIsotopeIndex));

                Console.WriteLine();

                //for (var charge = TopDownScorer.MinCharge; charge <= TopDownScorer.MaxCharge; charge++)
                //{
                var scorer = new TopDownScorer(protComposition, run, precursorTolerance);
                var score  = scorer.GetScore();

                Console.WriteLine(score);
                //var precursorIon = new Ion(protComposition + Composition.H2O, charge);
                //var xic = run.GetExtractedIonChromatogram(precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex), precursorTolerance);
                //Console.WriteLine(xic[0].ScanNum + " " + xic[1].ScanNum);

                //Console.WriteLine("ScanNum\t{0}", string.Join("\t", xic.Select(p => p.ScanNum.ToString())));
                //Console.WriteLine("precursorCharge " + charge + "\t" + string.Join("\t", xic.Select(p => p.Intensity.ToString())));
                // }

                Console.WriteLine("\nCharge\tm/z");

                for (var charge = 9; charge <= 18; charge++)
                {
                    var precursorIon = new Ion(protComposition + Composition.H2O, charge);
                    Console.WriteLine("{0}\t{1}", charge, precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex));
                }
            }

            // sw.Stop();

            // Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Example #28
0
        public void TestMsAlignPlusResults()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            TopDownScorer.MaxCharge = 25;
            TopDownScorer.MinCharge = 8;

            const string specFilePath          = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw";
            const string msAlignPlusResultPath = @"C:\workspace\TopDown\E_coli_iscU_60_mock_MSAlign_ResultTable_sam.txt";

            var dehydro      = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false);
            var cysteinylC   = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);

            var searchModifications = new List <SearchModification>
            {
                //pyroGluQ,
                dehydro,
                cysteinylC,
                glutathioneC,
                //oxM
            };
            var aaSet = new AminoAcidSet(searchModifications, 0);
            var precursorTolerance = new Tolerance(10);
            var run    = InMemoryLcMsRun.GetLcMsRun(specFilePath);
            var writer = new StreamWriter(msAlignPlusResultPath + ".txt");
            var reader = new StreamReader(msAlignPlusResultPath);

            string s;

            while ((s = reader.ReadLine()) != null)
            {
                if (s.StartsWith("Data_file_name\t"))
                {
                    writer.WriteLine(s + "\tScore");
                    continue;
                }
                var token      = s.Split('\t');
                var annotation = token[13];
                //  Console.WriteLine("***\t" + annotation);
                var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation);
                if (seqGraph == null)
                {
                    writer.WriteLine(s + "\tN/A");
                    continue;
                }

                var protCompositions = seqGraph.GetSequenceCompositions();

                var scorer = new TopDownScorer(protCompositions[0], run, precursorTolerance);
                var score  = scorer.GetScore();

                writer.WriteLine(s + "\t" + score);
                Console.WriteLine(score);
            }

            writer.Close();
            reader.Close();
        }
Example #29
0
        private SortedSet <DatabaseSequenceSpectrumMatch>[] RunSearch(IEnumerable <AnnotationAndOffset> annotationsAndOffsets, ISequenceFilter ms1Filter, bool isDecoy)
        {
            var sw          = new Stopwatch();
            var numPeptides = 0;

            sw.Reset();
            sw.Start();

            var matches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1];

            // TODO: N-term Met cleavage
            foreach (var annotationAndOffset in annotationsAndOffsets)
            {
                ++numPeptides;

                var annotation = annotationAndOffset.Annotation;
                var offset     = annotationAndOffset.Offset;

                if (numPeptides % 100000 == 0)
                {
                    Console.Write(@"Processing {0}{1} peptides...", numPeptides,
                                  numPeptides == 1 ? "st" : numPeptides == 2 ? "nd" : numPeptides == 3 ? "rd" : "th");
                    if (numPeptides != 0)
                    {
                        sw.Stop();
                        var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                        Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);
                        sw.Reset();
                        sw.Start();
                    }
                }

                var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, annotation);
                if (seqGraph == null)
                {
                    //                    Console.WriteLine("Ignoring illegal protein: {0}", annotation);
                    continue;
                }

                //var protCompositions = seqGraph.GetSequenceCompositions();
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs       = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass           = protCompositionWithH2O.Mass;
                    var modCombinations        = modCombs[modIndex];

                    foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass))
                    {
                        var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                        if (spec == null)
                        {
                            continue;
                        }
                        var charge =
                            (int)Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton));
                        var scorer = _ms2ScorerFactory.GetMs2Scorer(ms2ScanNum);
                        var score  = seqGraph.GetFragmentScore(scorer);
                        if (score <= 2)
                        {
                            continue;
                        }

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence     = annotation.Substring(2, annotation.Length - 4);
                        var pre          = annotation[0];
                        var post         = annotation[annotation.Length - 1];
                        var prsm         = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, 0, modCombinations,
                                                                             precursorIon, score, isDecoy);

                        if (matches[ms2ScanNum] == null)
                        {
                            matches[ms2ScanNum] = new SortedSet <DatabaseSequenceSpectrumMatch> {
                                prsm
                            };
                        }
                        else // already exists
                        {
                            var existingMatches = matches[ms2ScanNum];
                            if (existingMatches.Count < NumMatchesPerSpectrum)
                            {
                                existingMatches.Add(prsm);
                            }
                            else
                            {
                                var minScore = existingMatches.Min.Score;
                                if (score > minScore)
                                {
                                    existingMatches.Add(prsm);
                                    existingMatches.Remove(existingMatches.Min);
                                }
                            }
                        }
                    }
                }
            }

            return(matches);
        }