public void GetIsoProfile() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; //const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } seqGraph.SetSink(0); var neutral = seqGraph.GetSinkSequenceCompositionWithH2O() - Composition.Hydrogen; //Console.WriteLine(neutral); for (var charge = 22; charge <= 60; charge++) { var ion = new Ion(neutral, charge); Console.WriteLine("{0}\t{1}", charge, ion.GetMostAbundantIsotopeMz()); } var ion27 = new Ion(neutral, 29); var isotopes = ion27.GetIsotopes(0.1); foreach (var isotope in isotopes) { Console.WriteLine("{0}\t{1}", ion27.GetIsotopeMz(isotope.Index), isotope.Ratio); } }
public void TestBuildingReverseGraph() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { methylK, //pyroGluQ, oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } }
public void TestBuildingSequenceGraphLongProtein() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // Configure amino acid set const int numMaxModsPerProtein = 6; var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { pyroGluQ, //dehydro, //cysteinylC, //glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //const string protAnnotation = "A.HAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK.-"; //const string protAnnotation = // "_.QQ._"; const string protAnnotation = "_.MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAALRKLMMNHQ._"; var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); var seqCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}", seqComposition); var compIndex = 0; var compositions = seqGraph.GetFragmentCompositions(modIndex, 0).ToList(); foreach (var composition in compositions) { if (compIndex < 5 || compIndex >= compositions.Count - 5) { Console.WriteLine(" Seq: {0}, Frag: {1}", seqComposition, composition); } else if (compIndex == 5) { Console.WriteLine(" ..."); } compIndex++; } } }
public void TestSumIsoProfilesAcrossDifferentCharges() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath) as PbfLcMsRun; //var spec = run.GetSpectrum(46452); // 635.37 var spec = run.GetSummedMs1Spectrum(46437, 46466); var tolerance = new Tolerance(10); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; //const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } seqGraph.SetSink(0); var neutral = seqGraph.GetSinkSequenceCompositionWithH2O(); var theoProfile = neutral.GetIsotopomerEnvelopeRelativeIntensities(); var expProfile = new double[theoProfile.Length]; for (var charge = 22; charge <= 45; charge++) { var ion = new Ion(neutral, charge); var isotopePeaks = spec.GetAllIsotopePeaks(ion, tolerance, 0.1); if (isotopePeaks == null) { continue; } Assert.True(isotopePeaks.Length == theoProfile.Length); for (var i = 0; i < isotopePeaks.Length; i++) { if (isotopePeaks[i] != null) { expProfile[i] += isotopePeaks[i].Intensity; } } } for (var i = 0; i < theoProfile.Length; i++) { Console.WriteLine("{0}\t{1}\t{2}", neutral.GetIsotopeMass(i), theoProfile[i], expProfile[i] / expProfile.Max()); } Console.WriteLine("Corr: " + FitScoreCalculator.GetPearsonCorrelation(theoProfile, expProfile)); }
public IcScores GetScores(ProductSpectrum spec, string seqStr, Composition composition, int charge, int ms2ScanNum) { if (spec == null) { return(null); } var scorer = new CompositeScorer(spec, Tolerance, MinProductCharge, Math.Min(MaxProductCharge, charge), activationMethod: ActivationMethod); var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm); if (seqGraph == null) { return(null); } var bestScore = double.NegativeInfinity; Tuple <double, string> bestScoreAndModifications = null; var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); if (!protCompositionWithH2O.Equals(composition)) { continue; } var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(scorer); var curScore = curScoreAndModifications.Item1; if (!(curScore > bestScore)) { continue; } bestScoreAndModifications = curScoreAndModifications; bestScore = curScore; } if (bestScoreAndModifications == null) { return(null); } var modifications = bestScoreAndModifications.Item2; var seqObj = Sequence.CreateSequence(seqStr, modifications, AminoAcidSet); double score; int nMatchedFragments; GetCompositeScores(seqObj, charge, ms2ScanNum, out score, out nMatchedFragments); return(new IcScores(nMatchedFragments, score, modifications)); }
public void TestNumberOfProteoforms() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string annotation = "_.AMCMC._"; const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var methylC = new SearchModification(Modification.Methylation, 'C', SequenceLocation.Everywhere, true); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.PeptideNTerm, false); var searchModifications = new List <SearchModification> { //carbamidomethylC, //methylC, methylK, //pyroGluQ, oxM, //acetylN }; const int numMaxModsPerProtein = 4; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var protCompositions = seqGraph.GetSequenceCompositions(); var modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("\n#Protoeoforms by mod combinations: "); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { Console.Write((modIndex == 0) ? "No modifications" : modCombs[modIndex].ToString()); Console.Write("\t"); Console.WriteLine("{0}", seqGraph.GetNumProteoformSequences(modIndex)); } Console.WriteLine("\n#Protoeoforms by number of modificaionts: "); for (var nMod = 0; nMod <= numMaxModsPerProtein; nMod++) { Console.Write("#modificaitons = {0}", nMod); Console.Write("\t"); Console.WriteLine("{0}", seqGraph.GetNumProteoformSequencesByNumMods(nMod)); } }
public void TestCreatingHistoneGraph() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const int numMaxModsPerProtein = 11; // Histone H4 const string annotation = "_.MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG._"; // Histone H3.1 // const string annotation = // "_.MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA._"; var acetylR = new SearchModification(Modification.Acetylation, 'R', SequenceLocation.Everywhere, false); var acetylK = new SearchModification(Modification.Acetylation, 'K', SequenceLocation.Everywhere, false); var methylR = new SearchModification(Modification.Methylation, 'R', SequenceLocation.Everywhere, false); var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var diMethylR = new SearchModification(Modification.DiMethylation, 'R', SequenceLocation.Everywhere, false); var diMethylK = new SearchModification(Modification.DiMethylation, 'K', SequenceLocation.Everywhere, false); var triMethylR = new SearchModification(Modification.TriMethylation, 'R', SequenceLocation.Everywhere, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { acetylR, acetylK, methylR, methylK, diMethylR, diMethylK, triMethylR, phosphoS, phosphoT, phosphoY }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var graph = SequenceGraph.CreateGraph(aaSet, annotation); var numFragCompositions = graph.GetNumFragmentCompositions(); var numProteoforms = graph.GetNumProteoformCompositions(); var numSeqCompositions = graph.GetNumProteoformCompositions(); Console.WriteLine("NumFragmentCompositions: " + numFragCompositions); Console.WriteLine("NumProteoforms: " + numProteoforms); Console.WriteLine("NumSequenceCompositions: " + numSeqCompositions); }
public void TestGettingSequence() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string annotation = "_.AMCMC._"; const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var methylC = new SearchModification(Modification.Methylation, 'C', SequenceLocation.Everywhere, true); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.PeptideNTerm, false); var searchModifications = new List <SearchModification> { //carbamidomethylC, //methylC, methylK, //pyroGluQ, oxM, //acetylN }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var composition = protCompositions[modIndex]; Console.WriteLine("{0}\t{1}", composition, composition.Mass); var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(new DummyScorer()); if (curScoreAndModifications != null) { Console.WriteLine("Score: {0}, Modifications: {1}", curScoreAndModifications.Item1, curScoreAndModifications.Item2); } } }
public IcScores GetIcScores(IInformedScorer informedScorer, IScorer scorer, string seqStr, Composition composition) { var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm); if (seqGraph == null) { return(null); } var bestScore = double.NegativeInfinity; Tuple <double, string> bestScoreAndModifications = null; var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); if (!protCompositionWithH2O.Equals(composition)) { continue; } var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(scorer); var curScore = curScoreAndModifications.Item1; if (!(curScore > bestScore)) { continue; } bestScoreAndModifications = curScoreAndModifications; bestScore = curScore; } if (bestScoreAndModifications == null) { return(null); } var modifications = bestScoreAndModifications.Item2; var sequence = Sequence.CreateSequence(seqStr, modifications, this.AminoAcidSet); var numMatchedFragments = informedScorer.GetNumMatchedFragments(sequence); var score = informedScorer.GetUserVisibleScore(sequence); return(new IcScores(numMatchedFragments, score, modifications)); }
public void TestGraphWithModifications() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.MIALNKTPQTIVFYKPYGVLCQFTDNSAHPRPTLKDYINLPDLYPVGRLDQDSEGLLLLTSNGKLQHRLAHREFAHQRTYFAQVEGSPTDEDLEPLRRGITFADYPTRPAIAKIITEPDFPPRNPPIRYRASIPTSWLSITLTEGRNRQVRRMTAAVGFPTLRLVRVQIQVTGRSPQQGKGKSAATWCLTLEGLSPGQWRPLTPWEENFCQQLLTGNPNGPWQKKFGDRR._"; var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, nitrosylC, nethylmaleimideC, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var seqCompositions = seqGraph.GetSequenceCompositions(); var modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("*** Before cleavage: {0}", seqCompositions.Length); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}, ModComb: {1}", seqComposition, modCombs[modIndex]); } seqGraph.CleaveNTerm(); seqCompositions = seqGraph.GetSequenceCompositions(); modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("*** After cleavage: {0}", seqCompositions.Length); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}, ModComb: {1}", seqComposition, modCombs[modIndex]); } }
public void TestSequenceGraph() { var methodName = MethodBase.GetCurrentMethod().Name; ShowStarting(methodName); var phosPhoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosPhoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosPhoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var fixCarbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var searchModifications = new List <SearchModification> { phosPhoS, phosPhoT, phosPhoY, oxM, fixCarbamidomethylC }; //var searchModifications = new List<SearchModification> { phosPhoT, fixCarbamidomethylC }; const int numMaxModsPepPeptide = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPepPeptide); const string annotation = "_.STR._"; var pepSeq = annotation.Substring(2, annotation.Length - 4); Console.WriteLine(aaSet.GetComposition(pepSeq)); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine(graph.GetUnmodifiedSequenceComposition()); Assert.AreEqual(graph.GetUnmodifiedSequenceComposition(), aaSet.GetComposition(pepSeq)); Console.WriteLine("Annotation Compositions:"); var index = -1; foreach (var composition in graph.GetSequenceCompositions()) { Console.WriteLine(++index + ": " + composition); } //const int seqIndex = 1; //Console.WriteLine("Fragment Compositions (" + seqIndex +")"); //var scoringGraph = graph.GetScoringGraph(seqIndex); //foreach (var composition in scoringGraph.GetCompositions()) //{ // Console.WriteLine(composition); //} }
public void TestPsm() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const char pre = 'R'; const string sequence = "LENWPPASLADDL"; const char post = 'A'; const string annotation = "R.LENWPPASLADDL._"; const int charge = 2; const int ms2ScanNum = 25534; var aaSet = new AminoAcidSet(); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 0, 0); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 2, 10, 0, 1.1); ms2Scorer.DeconvoluteAllProductSpectra(); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); var graph = SequenceGraph.CreateGraph(aaSet, annotation); graph.SetSink(0); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedBottomUpScorer(run, aaSet, 1, 15, new Tolerance(10)); var refinedScore = informedScorer.GetScores(pre, sequence, post, composition, charge, ms2ScanNum); Console.WriteLine("RefinedScores: {0}", refinedScore); }
public void TestNTermMods() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.QARTKQTARK._"; // Configure amino acid set var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { acetylN, pyroGluQ, //oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //aaSet.Display(); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } Console.WriteLine("*** Cleave N-term"); seqGraph.CleaveNTerm(); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } }
public void TestSmartIsoWindowSumming() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; //const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } seqGraph.SetSink(0); var neutral = seqGraph.GetSinkSequenceCompositionWithH2O(); var ion = new Ion(neutral, 43); var tolerance = new Tolerance(10); const int targetMs2ScanNum = 46562; var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath) as PbfLcMsRun; var ms2Spec = run.GetSpectrum(targetMs2ScanNum) as ProductSpectrum; Assert.True(ms2Spec != null); var isoWindow = ms2Spec.IsolationWindow; //var prevScanNum = run.GetPrevScanNum(targetMs2ScanNum, 1); //var nextScanNum = run.GetNextScanNum(targetMs2ScanNum, 1); var summedSpec = run.GetSummedMs1Spectrum(targetMs2ScanNum, 2.5); //var windowSpec = summedSpec.GetPeakListWithin(isoWindow.MinMz, isoWindow.MaxMz); Console.WriteLine("Corr: " + summedSpec.GetCorrScore(ion, tolerance)); }
private static void TestCountingPeptides() { var aaSet = new AminoAcidSet(); var sw = new Stopwatch(); sw.Start(); //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002166_F86E3B2F.fasta"; const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_003456_9B916A8B.fasta"; // const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta"; var db = new FastaDatabase(dbFile); var indexedDb = new IndexedDatabase(db); indexedDb.Read(); //var numPeptides = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 150).LongCount(); var peptides = indexedDb.AnnotationsAndOffsets(7, 40, 2, 2, Enzyme.Trypsin); Parallel.ForEach(peptides, annotationAndOffset => //foreach(var annotationAndOffset in peptides) { var annotation = annotationAndOffset.Annotation; var offset = annotationAndOffset.Offset; var graph = SequenceGraph.CreateGraph(aaSet, annotation); } ) ; // Console.WriteLine("NumPeptides: {0}", numPeptides); sw.Stop(); var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"{0:f4} sec", sec); }
public void ScorePeptides(string outputFilePath) { using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("Annotation\tCharge\tScanNum"); foreach (var annotation in PeptideEnumerator) { // annotation: pre + "." + peptide + "." + post (e.g. R.PEPTIDER.G) var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, annotation); foreach (var sequenceComposition in seqGraph.GetSequenceCompositions()) { var peptideComposition = sequenceComposition + Composition.H2O; for (var precursorCharge = MinCharge; precursorCharge <= MaxCharge; precursorCharge++) { var precursorIon = new Ion(peptideComposition, precursorCharge); foreach (var scanNum in Run.GetFragmentationSpectraScanNums(precursorIon)) { writer.WriteLine("{0}\t{1}\t{2}", annotation, precursorCharge, scanNum); } } } } } }
private void FindFeatures() { m_FeatureFinderBackgroundWorker.ReportProgress(0, "Finding 3-D Features for Precursor and Fragments"); var seqGraph = SequenceGraph.CreateGraph(m_aminoAcidSet, CurrentPeptide); // var scoringGraph = seqGraph.GetScoringGraph(0); // var precursorIon = scoringGraph.GetPrecursorIon(this.CurrentChargeState); // var monoMz = precursorIon.GetMz(); var sequence = new Sequence(CurrentPeptide, m_aminoAcidSet); var precursorIon = sequence.GetPrecursorIon(CurrentChargeState); var monoMz = precursorIon.GetMonoIsotopicMz(); var uimfPointList = UimfUtil.GetXic(monoMz, CurrentTolerance, UIMFData.FrameType.MS1, DataReader.ToleranceType.PPM); var watershedPointList = WaterShedMapUtil.BuildWatershedMap(uimfPointList); var smoother = new SavitzkyGolaySmoother(11, 2); smoother.Smooth(ref watershedPointList); FeatureList = FeatureDetection.DoWatershedAlgorithm(watershedPointList).ToList(); IsotopeFeaturesDictionary.Clear(); var precursorTargetList = CurrentChargeState == 2 ? new List <string> { "-1", "0.5", "1", "1.5", "2", "3" } : new List <string> { "-1", "1", "2", "3" }; foreach (var precursorTarget in precursorTargetList) { var targetMz = precursorIon.GetIsotopeMz(double.Parse(precursorTarget)); var isotopeUimfPointList = UimfUtil.GetXic(targetMz, CurrentTolerance, UIMFData.FrameType.MS1, DataReader.ToleranceType.PPM); var isotopeWatershedPointList = WaterShedMapUtil.BuildWatershedMap(isotopeUimfPointList); var isotopeFeatures = FeatureDetection.DoWatershedAlgorithm(isotopeWatershedPointList).ToList(); IsotopeFeaturesDictionary.Add(precursorTarget, isotopeFeatures); } LcSlicePlot = new PlotModel(); ImsSlicePlot = new PlotModel(); FragmentFeaturesDictionary.Clear(); // var sequence = new Sequence(this.CurrentPeptide, m_aminoAcidSet); var ionTypeDictionary = sequence.GetProductIons(m_ionTypeFactory.GetAllKnownIonTypes()); double fragmentCount = ionTypeDictionary.Count; var index = 0; foreach (var ionTypeKvp in ionTypeDictionary) { var ionTypeTuple = ionTypeKvp.Key; var ion = ionTypeKvp.Value; var fragmentMz = ion.GetMonoIsotopicMz(); uimfPointList = UimfUtil.GetXic(fragmentMz, CurrentTolerance, UIMFData.FrameType.MS2, DataReader.ToleranceType.PPM); watershedPointList = WaterShedMapUtil.BuildWatershedMap(uimfPointList); smoother.Smooth(ref watershedPointList); var fragmentFeatureBlobList = FeatureDetection.DoWatershedAlgorithm(watershedPointList).ToList(); FragmentFeaturesDictionary.Add(ionTypeTuple, fragmentFeatureBlobList); index++; var progress = (int)((index / fragmentCount) * 100); m_FeatureFinderBackgroundWorker.ReportProgress(progress); } OnPropertyChanged("FeatureList"); OnPropertyChanged("LcSlicePlot"); OnPropertyChanged("ImsSlicePlot"); }
public void TestGeneratingXicsOfAllCharges() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath, 0.0, 0.0); var comparer = new MzComparerWithBinning(27); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; //const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } seqGraph.SetSink(0); var neutral = seqGraph.GetSinkSequenceCompositionWithH2O() - Composition.Hydrogen; var proteinMass = neutral.Mass; var isoEnv = Averagine.GetIsotopomerEnvelope(proteinMass); const bool SHOW_ALL_SCANS = false; var targetColIndex = 0; #pragma warning disable 0162 if (SHOW_ALL_SCANS) { Console.WriteLine("Charge\t" + string.Join("\t", run.GetScanNumbers(1))); } else { // Just display data for scan 161 Console.WriteLine("Charge\t161"); foreach (var scanNumber in run.GetScanNumbers(1)) { if (scanNumber == 161) { break; } targetColIndex++; } } #pragma warning restore 0162 const int minCharge = 2; const int maxCharge = 60; for (var charge = minCharge; charge <= maxCharge; charge++) { var ion = new Ion(neutral, charge); var mostAbundantIsotopeMz = ion.GetIsotopeMz(isoEnv.MostAbundantIsotopeIndex); //var secondMostAbundantIsotopeMz = ion.GetIsotopeMz(isoEnv.MostAbundantIsotopeIndex + 1); var binNum = comparer.GetBinNumber(mostAbundantIsotopeMz); var mzStart = comparer.GetMzStart(binNum); var mzEnd = comparer.GetMzEnd(binNum); var xic = run.GetFullPrecursorIonExtractedIonChromatogram(mzStart, mzEnd); Console.Write(charge + "\t"); #pragma warning disable 0162 if (SHOW_ALL_SCANS) { Console.WriteLine(string.Join("\t", xic.Select(p => p.Intensity))); } else { Console.WriteLine(xic[targetColIndex].Intensity); } #pragma warning restore 0162 } }
public void TestCorrMatchedPeakCounter() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parameters var precursorIonTolerance = new Tolerance(10); var productIonTolerance = new Tolerance(10); var sw = new System.Diagnostics.Stopwatch(); var aaSet = new AminoAcidSet(); const string protAnnotation = "_.TMNITSKQMEITPAIRQHVADRLAKLEKWQTHLINPHIILSKEPQGFIADATINTPNGHLVASAKHEDMYTAINELINKLERQLNKVQHKGEAR._"; // Create a sequence graph var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); Assert.NotNull(seqGraph, "Invalid sequence: {0}", protAnnotation); const string specFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); sw.Start(); var precursorFilter = new Ms1ContainsIonFilter(run, precursorIonTolerance); var seqCompositionArr = seqGraph.GetSequenceCompositions(); Console.WriteLine("Length: {0}\tNumCompositions: {1}", protAnnotation.Length - 4, seqCompositionArr.Length); const int charge = 9; const int modIndex = 0; const int ms2ScanNum = 3633; var seqComposition = seqCompositionArr[modIndex]; var peptideComposition = seqComposition + Composition.H2O; peptideComposition.GetIsotopomerEnvelopeRelativeIntensities(); Console.WriteLine("Composition: {0}, AveragineMass: {1}", seqComposition, seqComposition.Mass); seqGraph.SetSink(modIndex); var precursorIon = new Ion(peptideComposition, charge); Assert.True(precursorFilter.IsValid(precursorIon, ms2ScanNum)); var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; Assert.True(spec != null); //var scorer = new MatchedPeakCounter(spec, productIonTolerance, 1, 10); var scorer = new CorrMatchedPeakCounter(spec, productIonTolerance, 1, 10); var score = seqGraph.GetFragmentScore(scorer); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", protAnnotation, charge, precursorIon.GetMostAbundantIsotopeMz(), ms2ScanNum, score); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestMatchedPeakCounter() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parameters var precursorIonTolerance = new Tolerance(15); var productIonTolerance = new Tolerance(15); var sw = new System.Diagnostics.Stopwatch(); var aaSet = new AminoAcidSet(); const string protAnnotation = "_.MFQQEVTITAPNGLHTRPAAQFVKEAKGFTSEITVTSNGKSASAKSLFKLQTLGLTQGTVVTISAEGEDEQKAVEHLVKLMAELE._"; // Create a sequence graph var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); Assert.NotNull(seqGraph, "Invalid sequence: {0}", protAnnotation); const string specFilePath = @"\\protoapps\UserData\Jungkap\Joshua\testData\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); sw.Start(); var precursorFilter = new Ms1ContainsIonFilter(run, precursorIonTolerance); var seqCompositionArr = seqGraph.GetSequenceCompositions(); Console.WriteLine("Length: {0}\tNumCompositions: {1}", protAnnotation.Length - 4, seqCompositionArr.Length); const int charge = 6; const int modIndex = 0; const int ms2ScanNum = 4448; var seqComposition = seqCompositionArr[modIndex]; var peptideComposition = seqComposition + Composition.H2O; peptideComposition.GetIsotopomerEnvelopeRelativeIntensities(); Console.WriteLine("Composition: {0}, AveragineMass: {1}", seqComposition, seqComposition.Mass); seqGraph.SetSink(modIndex); var precursorIon = new Ion(peptideComposition, charge); Assert.True(precursorFilter.IsValid(precursorIon, ms2ScanNum)); var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; Assert.True(spec != null); var scorer = new MatchedPeakCounter(spec, productIonTolerance, 1, 10); var score = seqGraph.GetFragmentScore(scorer); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", protAnnotation, charge, precursorIon.GetMostAbundantIsotopeMz(), ms2ScanNum, score); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
} // true: target and decoy, false: target only, null: decoy only public void QuickId() { const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; const string modFilePath = @"H:\Research\QCShew_TopDown\Production\Mods.txt"; const int numBits = 29; // max error: 4ppm const int minCharge = 1; const int maxCharge = 20; var tolerance = new Tolerance(10); const double corrThreshold = 0.7; var comparer = new MzComparerWithBinning(numBits); const double minFragmentMass = 200.0; const double maxFragmentMass = 50000.0; var minFragMassBin = comparer.GetBinNumber(minFragmentMass); var maxFragMassBin = comparer.GetBinNumber(maxFragmentMass); var aminoAcidSet = new AminoAcidSet(modFilePath); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); var ms2ScanNumArr = run.GetScanNumbers(2).ToArray(); var sw = new Stopwatch(); sw.Start(); Console.Write("Building Spectrum Arrays..."); var massVectors = new BitArray[maxFragMassBin - minFragMassBin + 1]; for (var i = minFragMassBin; i <= maxFragMassBin; i++) { massVectors[i - minFragMassBin] = new BitArray(run.MaxLcScan + 1); } foreach (var ms2ScanNum in ms2ScanNumArr) { var productSpec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (productSpec == null) { continue; } var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(productSpec.Peaks, minCharge, maxCharge, 2, 1.1, tolerance, corrThreshold); if (deconvolutedPeaks == null) { continue; } foreach (var p in deconvolutedPeaks) { var mass = p.Mass; var deltaMass = tolerance.GetToleranceAsDa(mass, 1); var minMass = mass - deltaMass; var maxMass = mass + deltaMass; var minBinNum = comparer.GetBinNumber(minMass); var maxBinNum = comparer.GetBinNumber(maxMass); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { if (binNum >= minFragMassBin && binNum <= maxFragMassBin) { massVectors[binNum - minFragMassBin][ms2ScanNum] = true; } } } } sw.Stop(); Console.WriteLine(@"{0:f1} sec.", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); var fastaDb = new FastaDatabase(fastaFilePath); fastaDb.Read(); var indexedDb = new IndexedDatabase(fastaDb); var numProteins = 0; var intactProteinAnnotationAndOffsets = indexedDb.IntactSequenceAnnotationsAndOffsets(0, int.MaxValue); var bestProtein = new string[run.MaxLcScan + 1]; var bestScore = new int[run.MaxLcScan + 1]; foreach (var annotationAndOffset in intactProteinAnnotationAndOffsets) { if (++numProteins % 10 == 0) { Console.WriteLine(@"Processing, {0} proteins done, {1:f1} sec elapsed", numProteins, sw.Elapsed.TotalSeconds); } var annotation = annotationAndOffset.Annotation; var offset = annotationAndOffset.Offset; var protSequence = annotation.Substring(2, annotation.Length - 4); // suffix var seqGraph = SequenceGraph.CreateGraph(aminoAcidSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { continue; } for (var numNTermCleavage = 0; numNTermCleavage <= 1; numNTermCleavage++) { if (numNTermCleavage > 0) { seqGraph.CleaveNTerm(); } var allCompositions = seqGraph.GetAllFragmentNodeCompositions(); var scoreArr = new int[run.MaxLcScan + 1]; foreach (var fragComp in allCompositions) { var suffixMass = fragComp.Mass + BaseIonType.Y.OffsetComposition.Mass; var binNum = comparer.GetBinNumber(suffixMass); if (binNum < minFragMassBin || binNum > maxFragMassBin) { continue; } var vector = massVectors[binNum - minFragMassBin]; foreach (var ms2ScanNum in ms2ScanNumArr) { if (vector[ms2ScanNum]) { ++scoreArr[ms2ScanNum]; } } } foreach (var ms2ScanNum in ms2ScanNumArr) { if (scoreArr[ms2ScanNum] > bestScore[ms2ScanNum]) { bestScore[ms2ScanNum] = scoreArr[ms2ScanNum]; var proteinName = fastaDb.GetProteinName(offset); bestProtein[ms2ScanNum] = proteinName + (numNTermCleavage == 1 ? "'" : ""); } } } // prefix } Console.WriteLine("ScanNum\tBestProtein\tScore"); foreach (var ms2ScanNum in ms2ScanNumArr) { Console.WriteLine("{0}\t{1}\t{2}", ms2ScanNum, bestScore[ms2ScanNum], bestProtein[ms2ScanNum] ?? ""); } }
public void TestPrSm() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw"; //const string annotation = // "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" + // "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" + // "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" + // "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" + // "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" + // "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" + // "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._"; //var aaSet = new AminoAcidSet(); //const int charge = 60; //const int ms2ScanNum = 46661; const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const int ms2ScanNum = 19011; const int charge = 7; const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._"; var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true); var modVal = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0)); var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList(); searchMods.Add(acetylN); const int numMaxModsPerProtein = 1; var aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions()); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15); ms2Scorer.GetScorer(ms2ScanNum); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); Assert.NotNull(scorer, "Scorer is null!"); for (var i = 0; i < graph.GetNumProteoformCompositions(); i++) { graph.SetSink(i); Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10)); var refinedScore = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum); Console.WriteLine("Modifications: {0}", refinedScore.Modifications); Console.WriteLine("Composition: {0}", composition); Console.WriteLine("RefinedScores: {0}", refinedScore); } }
public void Test43KProtein() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var dethiomethylM = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false); var deamidatedN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); var deamidatedQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C', SequenceLocation.ProteinNTerm, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, dethiomethylM, acetylN, //phosphoS, //phosphoT, //phosphoY, deamidatedN, // deamidatedQ, glutathioneC, pyroCarbamidomethylC, nitrosylC, nethylmaleimideC }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); // var aaSet = new AminoAcidSet(); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(aaSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } var ms1Filter = new SimpleMs1Filter(); var ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(run); foreach (var ms2ScanNum in Ms2ScanNums) { ms2ScorerFactory.GetScorer(ms2ScanNum); } for (var numNTermCleavages = 0; numNTermCleavages <= 0; numNTermCleavages++) { if (numNTermCleavages > 0) { seqGraph.CleaveNTerm(); } var numProteoforms = seqGraph.GetNumProteoformCompositions(); var modCombs = seqGraph.GetModificationCombinations(); for (var modIndex = 0; modIndex < numProteoforms; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; var modCombinations = modCombs[modIndex]; foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass)) { var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) { continue; } var charge = (int) Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var scorer = ms2ScorerFactory.GetMs2Scorer(ms2ScanNum); var score = seqGraph.GetFragmentScore(scorer); if (score <= 3) { continue; } var precursorIon = new Ion(protCompositionWithH2O, charge); var sequence = protSequence.Substring(numNTermCleavages); var pre = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1]; var post = annotation[annotation.Length - 1]; Console.WriteLine("{0}.{1}.{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", pre, sequence, post, ms2ScanNum, modCombinations, precursorIon.GetMostAbundantIsotopeMz(), precursorIon.Charge, precursorIon.Composition.Mass, score); } } } }
public IcBottomUpScores GetScores(char pre, string sequence, char post, AminoAcid nTerm, AminoAcid cTerm, Composition composition, int charge, int ms2ScanNum) { ScoredSpectrum scoredSpectrum; var index = GetChargetScanNumPairIndex(charge, ms2ScanNum); if (!_scoredSpectra.TryGetValue(index, out scoredSpectrum)) { var spec = Run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) { return(null); } scoredSpectrum = new ScoredSpectrum(spec, _rankScorer, charge, composition.Mass, Tolerance); _scoredSpectra.Add(index, scoredSpectrum); } var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, nTerm, sequence, cTerm); if (seqGraph == null) { return(null); } Tuple <double, string> scoreAndModifications = null; var bestScore = double.NegativeInfinity; var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); if (!protCompositionWithH2O.Equals(composition)) { continue; } var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(scoredSpectrum); var curScore = curScoreAndModifications.Item1; if (curScore > bestScore) { scoreAndModifications = curScoreAndModifications; bestScore = curScore; } } if (scoreAndModifications == null) { return(null); } var ms2Score = scoreAndModifications.Item1; // TODO: This assumes enzyme is trypsin const double probN = 0.99999; const double probC = 0.99999; const double sumAAProbabilities = 0.1; var creditN = Math.Log(probN / sumAAProbabilities); var penaltyN = Math.Log((1.0 - probN) / (1.0 - sumAAProbabilities)); var creditC = Math.Log(probC / sumAAProbabilities); var penaltyC = Math.Log((1.0 - probC) / (1.0 - sumAAProbabilities)); if (pre == 'K' || pre == 'R' || pre == FastaDatabaseConstants.Delimiter || pre == '-') { ms2Score += creditN; } else { ms2Score += penaltyN; } var lastResidue = sequence[sequence.Length - 1]; if (lastResidue == 'K' || lastResidue == 'R' || post == FastaDatabaseConstants.Delimiter || post == '-') { ms2Score += creditC; } else { ms2Score += penaltyC; } var modifications = scoreAndModifications.Item2; return(new IcBottomUpScores(ms2Score, modifications)); }
public void TestTopDownScoringForAllXics() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // Search parameters const int numNTermCleavages = 1; // 30 const int minLength = 7; const int maxLength = 1000; //const int minCharge = 5; // 3 //const int maxCharge = 15; // 67 const int numMaxModsPerProtein = 0; // 6 var precursorTolerance = new Tolerance(10); const string dbFilePath = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.fasta"; //const string dbFilePath = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.icdecoy.KR.fasta"; //const string dbFilePath = @"..\..\..\TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta"; // const string dbFilePath = // @"C:\cygwin\home\kims336\Data\TopDown\ID_003558_56D73071.fasta"; var sw = new System.Diagnostics.Stopwatch(); sw.Start(); Console.Write("Reading raw file..."); const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); // Configure amino acid set // var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); // var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var targetDb = new FastaDatabase(dbFilePath); // targetDb.CreateDecoyDatabase(Enzyme.Trypsin); // System.Environment.Exit(1); var indexedDb = new IndexedDatabase(targetDb); var numProteins = 0; long totalProtCompositions = 0; //long numXics = 0; TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; sw.Reset(); sw.Start(); Console.WriteLine("Generating XICs..."); foreach (var protAnnotationAndOffset in indexedDb.IntactSequenceAnnotationsAndOffsets(minLength, maxLength)) { ++numProteins; //if (numProteins > 2000) break; if (numProteins % 1000 == 0) { Console.WriteLine("Processed {0} proteins", numProteins); } //Console.WriteLine(protAnnotation); var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotationAndOffset.Annotation); //Console.WriteLine(seqGraph.GetSequenceCompositions()[0]); if (seqGraph == null) { continue; } for (var nTermCleavages = 0; nTermCleavages <= numNTermCleavages; nTermCleavages++) { if (nTermCleavages > 0) { seqGraph.CleaveNTerm(); } var protCompositions = seqGraph.GetSequenceCompositions(); foreach (var protComposition in protCompositions) { totalProtCompositions++; // Console.WriteLine(protComposition); var scorer = new TopDownScorer(protComposition, run, precursorTolerance); var score = scorer.GetScore(); Console.WriteLine(score); } } } sw.Stop(); Console.WriteLine("NumProteins: {0}", numProteins); Console.WriteLine("NumProteinCompositions: {0}", totalProtCompositions); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
private void SearchForMatches(AnnotationAndOffset annotationAndOffset, ISequenceFilter sequenceFilter, SortedSet <DatabaseSequenceSpectrumMatch>[] matches, int maxNumNTermCleavages, bool isDecoy, CancellationToken?cancellationToken = null) { var pfeOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxNumThreads, CancellationToken = cancellationToken ?? CancellationToken.None }; var annotation = annotationAndOffset.Annotation; var offset = annotationAndOffset.Offset; //var protein = db.GetProteinName(offset); var protSequence = annotation.Substring(2, annotation.Length - 4); var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; // No matches will be found without a sequence graph. } for (var numNTermCleavages = 0; numNTermCleavages <= maxNumNTermCleavages; numNTermCleavages++) { if (numNTermCleavages > 0) { seqGraph.CleaveNTerm(); } var numProteoforms = seqGraph.GetNumProteoformCompositions(); var modCombs = seqGraph.GetModificationCombinations(); for (var modIndex = 0; modIndex < numProteoforms; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; if (sequenceMass < MinSequenceMass || sequenceMass > MaxSequenceMass) { continue; } var modCombinations = modCombs[modIndex]; var ms2ScanNums = this.ScanNumbers ?? sequenceFilter.GetMatchingMs2ScanNums(sequenceMass); Parallel.ForEach(ms2ScanNums, pfeOptions, ms2ScanNum => { if (ms2ScanNum > _ms2ScanNums.Last() || ms2ScanNum < _ms2ScanNums.First()) { return; } var scorer = _ms2ScorerFactory2.GetMs2Scorer(ms2ScanNum); var score = seqGraph.GetFragmentScore(scorer); var isoTargetMz = _isolationWindowTargetMz[ms2ScanNum]; if (!(isoTargetMz > 0)) { return; } var charge = (int)Math.Round(sequenceMass / (isoTargetMz - Constants.Proton)); var precursorIon = new Ion(protCompositionWithH2O, charge); var sequence = protSequence.Substring(numNTermCleavages); var pre = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1]; var post = annotation[annotation.Length - 1]; var prsm = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, numNTermCleavages, modCombinations, precursorIon, score, isDecoy); AddMatch(matches, ms2ScanNum, prsm); }); } } }
public void TestTopDownScoring() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; const string protAnnotation = "A.AHAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK."; var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; //var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); var aaSet = new AminoAcidSet(searchModifications, 0); var precursorTolerance = new Tolerance(10); //Console.WriteLine(aaSet.GetAminoAcid('C').GetComposition()); // Create a sequence graph //var protSeq = protAnnotation.Substring(2, protAnnotation.Length - 4); var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); // TopDownScorer.MaxCharge = 60; // TopDownScorer.MinCharge = 3; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); foreach (var protComposition in seqGraph.GetSequenceCompositions()) { var mostAbundantIsotopeIndex = protComposition.GetMostAbundantIsotopeZeroBasedIndex(); Console.WriteLine("Composition\t{0}", protComposition); Console.WriteLine("MostAbundantIsotopeIndex\t{0}", mostAbundantIsotopeIndex); Console.WriteLine(new Ion(protComposition + Composition.H2O, 11).GetIsotopeMz(mostAbundantIsotopeIndex)); Console.WriteLine(); //for (var charge = TopDownScorer.MinCharge; charge <= TopDownScorer.MaxCharge; charge++) //{ var scorer = new TopDownScorer(protComposition, run, precursorTolerance); var score = scorer.GetScore(); Console.WriteLine(score); //var precursorIon = new Ion(protComposition + Composition.H2O, charge); //var xic = run.GetExtractedIonChromatogram(precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex), precursorTolerance); //Console.WriteLine(xic[0].ScanNum + " " + xic[1].ScanNum); //Console.WriteLine("ScanNum\t{0}", string.Join("\t", xic.Select(p => p.ScanNum.ToString()))); //Console.WriteLine("precursorCharge " + charge + "\t" + string.Join("\t", xic.Select(p => p.Intensity.ToString()))); // } Console.WriteLine("\nCharge\tm/z"); for (var charge = 9; charge <= 18; charge++) { var precursorIon = new Ion(protComposition + Composition.H2O, charge); Console.WriteLine("{0}\t{1}", charge, precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex)); } } // sw.Stop(); // Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestMsAlignPlusResults() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; const string msAlignPlusResultPath = @"C:\workspace\TopDown\E_coli_iscU_60_mock_MSAlign_ResultTable_sam.txt"; var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, 0); var precursorTolerance = new Tolerance(10); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); var writer = new StreamWriter(msAlignPlusResultPath + ".txt"); var reader = new StreamReader(msAlignPlusResultPath); string s; while ((s = reader.ReadLine()) != null) { if (s.StartsWith("Data_file_name\t")) { writer.WriteLine(s + "\tScore"); continue; } var token = s.Split('\t'); var annotation = token[13]; // Console.WriteLine("***\t" + annotation); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); if (seqGraph == null) { writer.WriteLine(s + "\tN/A"); continue; } var protCompositions = seqGraph.GetSequenceCompositions(); var scorer = new TopDownScorer(protCompositions[0], run, precursorTolerance); var score = scorer.GetScore(); writer.WriteLine(s + "\t" + score); Console.WriteLine(score); } writer.Close(); reader.Close(); }
private SortedSet <DatabaseSequenceSpectrumMatch>[] RunSearch(IEnumerable <AnnotationAndOffset> annotationsAndOffsets, ISequenceFilter ms1Filter, bool isDecoy) { var sw = new Stopwatch(); var numPeptides = 0; sw.Reset(); sw.Start(); var matches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; // TODO: N-term Met cleavage foreach (var annotationAndOffset in annotationsAndOffsets) { ++numPeptides; var annotation = annotationAndOffset.Annotation; var offset = annotationAndOffset.Offset; if (numPeptides % 100000 == 0) { Console.Write(@"Processing {0}{1} peptides...", numPeptides, numPeptides == 1 ? "st" : numPeptides == 2 ? "nd" : numPeptides == 3 ? "rd" : "th"); if (numPeptides != 0) { sw.Stop(); var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); sw.Start(); } } var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, annotation); if (seqGraph == null) { // Console.WriteLine("Ignoring illegal protein: {0}", annotation); continue; } //var protCompositions = seqGraph.GetSequenceCompositions(); var numProteoforms = seqGraph.GetNumProteoformCompositions(); var modCombs = seqGraph.GetModificationCombinations(); for (var modIndex = 0; modIndex < numProteoforms; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; var modCombinations = modCombs[modIndex]; foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass)) { var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) { continue; } var charge = (int)Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var scorer = _ms2ScorerFactory.GetMs2Scorer(ms2ScanNum); var score = seqGraph.GetFragmentScore(scorer); if (score <= 2) { continue; } var precursorIon = new Ion(protCompositionWithH2O, charge); var sequence = annotation.Substring(2, annotation.Length - 4); var pre = annotation[0]; var post = annotation[annotation.Length - 1]; var prsm = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, 0, modCombinations, precursorIon, score, isDecoy); if (matches[ms2ScanNum] == null) { matches[ms2ScanNum] = new SortedSet <DatabaseSequenceSpectrumMatch> { prsm }; } else // already exists { var existingMatches = matches[ms2ScanNum]; if (existingMatches.Count < NumMatchesPerSpectrum) { existingMatches.Add(prsm); } else { var minScore = existingMatches.Min.Score; if (score > minScore) { existingMatches.Add(prsm); existingMatches.Remove(existingMatches.Min); } } } } } } return(matches); }