/// <summary> /// Initializes a new instance of the <see cref="PrecursorSequenceIonViewModel"/> class. /// </summary> public PrecursorSequenceIonViewModel() { FragmentationSequence = new FragmentationSequence( new Sequence(new List <AminoAcid>()), 1, null, ActivationMethod.HCD); HeavyModifications = new SearchModification[0]; PrecursorViewMode = PrecursorViewMode.Isotopes; LabeledIonViewModels = new LabeledIonViewModel[0]; this.WhenAnyValue(x => x.ChargeViewMode).Subscribe(chargeViewMode => IsotopeViewMode = !chargeViewMode); this.WhenAnyValue(x => x.IsotopeViewMode).Subscribe(isotopeViewMode => ChargeViewMode = !isotopeViewMode); this.WhenAnyValue(x => x.IsotopeViewMode).Subscribe(isotopeViewMode => PrecursorViewMode = isotopeViewMode ? PrecursorViewMode.Isotopes : PrecursorViewMode.Charges); this.WhenAnyValue(x => x.PrecursorViewMode).Subscribe( viewMode => { IsotopeViewMode = viewMode == PrecursorViewMode.Isotopes; ChargeViewMode = viewMode == PrecursorViewMode.Charges; }); this.WhenAnyValue(x => x.PrecursorViewMode, x => x.RelativeIntensityThreshold, x => x.HeavyModifications, x => x.FragmentationSequence) .SelectMany(async _ => await GetLabeledIonViewModels()) .Subscribe(livms => LabeledIonViewModels = livms); }
public void TestCreatingAminoAcidSet() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Configure amino acid set var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.PeptideNTerm, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { acetylN, pyroGluQ, oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); aaSet.Display(); }
public void TestBuildingReverseGraph() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { methylK, //pyroGluQ, oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } }
public void TestBuildingSequenceGraphLongProtein() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // Configure amino acid set const int numMaxModsPerProtein = 6; var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { pyroGluQ, //dehydro, //cysteinylC, //glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //const string protAnnotation = "A.HAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK.-"; //const string protAnnotation = // "_.QQ._"; const string protAnnotation = "_.MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAALRKLMMNHQ._"; var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); var seqCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}", seqComposition); var compIndex = 0; var compositions = seqGraph.GetFragmentCompositions(modIndex, 0).ToList(); foreach (var composition in compositions) { if (compIndex < 5 || compIndex >= compositions.Count - 5) { Console.WriteLine(" Seq: {0}, Frag: {1}", seqComposition, composition); } else if (compIndex == 5) { Console.WriteLine(" ..."); } compIndex++; } } }
public void TestForJiaData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // QC_Shew //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; //const string dbFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta"; //const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\database\Test.fasta"; // Jia's data const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.raw"; const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta"; const string outputDir = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\D1_1_Mode1"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); //var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, nitrosylC, nethylmaleimideC, oxM, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 1; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestNumberOfProteoforms() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string annotation = "_.AMCMC._"; const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var methylC = new SearchModification(Modification.Methylation, 'C', SequenceLocation.Everywhere, true); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.PeptideNTerm, false); var searchModifications = new List <SearchModification> { //carbamidomethylC, //methylC, methylK, //pyroGluQ, oxM, //acetylN }; const int numMaxModsPerProtein = 4; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var protCompositions = seqGraph.GetSequenceCompositions(); var modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("\n#Protoeoforms by mod combinations: "); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { Console.Write((modIndex == 0) ? "No modifications" : modCombs[modIndex].ToString()); Console.Write("\t"); Console.WriteLine("{0}", seqGraph.GetNumProteoformSequences(modIndex)); } Console.WriteLine("\n#Protoeoforms by number of modificaionts: "); for (var nMod = 0; nMod <= numMaxModsPerProtein; nMod++) { Console.Write("#modificaitons = {0}", nMod); Console.Write("\t"); Console.WriteLine("{0}", seqGraph.GetNumProteoformSequencesByNumMods(nMod)); } }
public void TestCreatingHistoneGraph() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const int numMaxModsPerProtein = 11; // Histone H4 const string annotation = "_.MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG._"; // Histone H3.1 // const string annotation = // "_.MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA._"; var acetylR = new SearchModification(Modification.Acetylation, 'R', SequenceLocation.Everywhere, false); var acetylK = new SearchModification(Modification.Acetylation, 'K', SequenceLocation.Everywhere, false); var methylR = new SearchModification(Modification.Methylation, 'R', SequenceLocation.Everywhere, false); var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var diMethylR = new SearchModification(Modification.DiMethylation, 'R', SequenceLocation.Everywhere, false); var diMethylK = new SearchModification(Modification.DiMethylation, 'K', SequenceLocation.Everywhere, false); var triMethylR = new SearchModification(Modification.TriMethylation, 'R', SequenceLocation.Everywhere, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { acetylR, acetylK, methylR, methylK, diMethylR, diMethylK, triMethylR, phosphoS, phosphoT, phosphoY }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var graph = SequenceGraph.CreateGraph(aaSet, annotation); var numFragCompositions = graph.GetNumFragmentCompositions(); var numProteoforms = graph.GetNumProteoformCompositions(); var numSeqCompositions = graph.GetNumProteoformCompositions(); Console.WriteLine("NumFragmentCompositions: " + numFragCompositions); Console.WriteLine("NumProteoforms: " + numProteoforms); Console.WriteLine("NumSequenceCompositions: " + numSeqCompositions); }
public void TestForSbepData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //// Salmonella const string specFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TopDown\SBEP_STM_001_02272012_Aragon.raw"; const string dbFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002166_F86E3B2F.fasta"; const string outputDir = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Results\Mod_M2"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } if (!Directory.Exists(outputDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, outputDir); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 2; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestGettingSequence() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string annotation = "_.AMCMC._"; const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var methylC = new SearchModification(Modification.Methylation, 'C', SequenceLocation.Everywhere, true); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.PeptideNTerm, false); var searchModifications = new List <SearchModification> { //carbamidomethylC, //methylC, methylK, //pyroGluQ, oxM, //acetylN }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var composition = protCompositions[modIndex]; Console.WriteLine("{0}\t{1}", composition, composition.Mass); var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(new DummyScorer()); if (curScoreAndModifications != null) { Console.WriteLine("Score: {0}, Modifications: {1}", curScoreAndModifications.Item1, curScoreAndModifications.Item2); } } }
public void TestForSbepData() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); //// Salmonella var specFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TopDown\SBEP_STM_001_02272012_Aragon.raw"); var dbFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_002166_F86E3B2F.fasta"); var outputDir = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"Results\Mod_M2"); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } if (!Directory.Exists(outputDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, outputDir); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const InternalCleavageType searchMode = InternalCleavageType.NoInternalCleavage; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term DatabaseSearchMode tda = DatabaseSearchMode.Both; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestIcRescoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string specFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; //const string icResultPath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_Map07_Re.icdresult"; //const string outputPath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_Map07_Re_Rescored.icdresult"; const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.raw"; const string icResultPath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.ictresult"; const string outputPath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.ictresult"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(icResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, icResultPath); } var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, nitrosylC, nethylmaleimideC, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var tolerance = new Tolerance(10.0); var rescorer = new IcRescorer(specFilePath, icResultPath, outputPath, aaSet, tolerance, 0.7); Console.WriteLine("Done"); }
public void TestGraphWithModifications() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.MIALNKTPQTIVFYKPYGVLCQFTDNSAHPRPTLKDYINLPDLYPVGRLDQDSEGLLLLTSNGKLQHRLAHREFAHQRTYFAQVEGSPTDEDLEPLRRGITFADYPTRPAIAKIITEPDFPPRNPPIRYRASIPTSWLSITLTEGRNRQVRRMTAAVGFPTLRLVRVQIQVTGRSPQQGKGKSAATWCLTLEGLSPGQWRPLTPWEENFCQQLLTGNPNGPWQKKFGDRR._"; var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, nitrosylC, nethylmaleimideC, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var seqCompositions = seqGraph.GetSequenceCompositions(); var modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("*** Before cleavage: {0}", seqCompositions.Length); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}, ModComb: {1}", seqComposition, modCombs[modIndex]); } seqGraph.CleaveNTerm(); seqCompositions = seqGraph.GetSequenceCompositions(); modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("*** After cleavage: {0}", seqCompositions.Length); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}, ModComb: {1}", seqComposition, modCombs[modIndex]); } }
public void TestSequenceGraph() { var methodName = MethodBase.GetCurrentMethod().Name; ShowStarting(methodName); var phosPhoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosPhoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosPhoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var fixCarbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var searchModifications = new List <SearchModification> { phosPhoS, phosPhoT, phosPhoY, oxM, fixCarbamidomethylC }; //var searchModifications = new List<SearchModification> { phosPhoT, fixCarbamidomethylC }; const int numMaxModsPepPeptide = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPepPeptide); const string annotation = "_.STR._"; var pepSeq = annotation.Substring(2, annotation.Length - 4); Console.WriteLine(aaSet.GetComposition(pepSeq)); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine(graph.GetUnmodifiedSequenceComposition()); Assert.AreEqual(graph.GetUnmodifiedSequenceComposition(), aaSet.GetComposition(pepSeq)); Console.WriteLine("Annotation Compositions:"); var index = -1; foreach (var composition in graph.GetSequenceCompositions()) { Console.WriteLine(++index + ": " + composition); } //const int seqIndex = 1; //Console.WriteLine("Fragment Compositions (" + seqIndex +")"); //var scoringGraph = graph.GetScoringGraph(seqIndex); //foreach (var composition in scoringGraph.GetCompositions()) //{ // Console.WriteLine(composition); //} }
public void TestQcShewQExactive() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // QC_Shew QE const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; const string dbFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\ID_003456_9B916A8B.fasta"; const string outputDir = @"C:\cygwin\home\kims336\Data\QCShewQE\Ic_NTT2_Mass"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 0; var searchModifications = new List <SearchModification> { //carbamidomethylC, acetylN, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int ntt = 2; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestBottomUpSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, ntt); }
public void TestDdaPlus() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // QC_Shew QE const string specFilePath = @"H:\Research\DDAPlus\raw\20140701_yeast_DDA_01.raw"; const string dbFilePath = @"H:\Research\DDAPlus\database\Yeast_SGD_withContam.fasta"; const string outputDir = @"H:\Research\DDAPlus\Test"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerPeptide = 2; var searchModifications = new List <SearchModification> { //carbamidomethylC, acetylN, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerPeptide); const int ntt = 2; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestBottomUpSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, ntt); }
public void TestForAaronData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownAaron\raw\MTB_intact_1.raw"; const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownAaron\database\ID_003121_998584F8.fasta"; const string outputDir = @"C:\cygwin\home\kims336\Data\TopDownAaron\Ic\Mode1_07"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var tevFp2C = new SearchModification(Modification.TevFp2, 'S', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, //glutathioneC, //nitrosylC, //nethylmaleimideC, oxM, acetylN, tevFp2C }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 1; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestMaccoss(string specFilePath) { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const string dbFilePath = @"D:\Research\Data\UW\QExactive\M_musculus_Uniprot_withContam.fasta"; const string outputDir = @"D:\Research\Data\UW\QExactive\Ic_NTT1_Mass"; if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); //var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); //var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 0; var searchModifications = new List <SearchModification> { carbamidomethylC, //acetylN, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int ntt = 1; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestBottomUpSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, ntt); }
public void TestForQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // QC_Shew const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string dbFilePath = @"D:\MSPathFinder\Fasta\ID_002216_235ACCEA.fasta"; const string outputDir = @"D:\MassSpecFiles\training\test"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 2; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestNTermMods() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.QARTKQTARK._"; // Configure amino acid set var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { acetylN, pyroGluQ, //oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //aaSet.Display(); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } Console.WriteLine("*** Cleave N-term"); seqGraph.CleaveNTerm(); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } }
public void ProcessMhcData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultPath = @"D:\Research\Data\ImmunoPeptidomics\Benchmarking\IPA\carone_C1309_All.tsv"; const string outputFilePath = @"D:\Research\Data\ImmunoPeptidomics\Benchmarking\IPA\IPA_Summary.tsv"; var specFiles = Directory.GetFiles(@"D:\Research\Data\ImmunoPeptidomics\Benchmarking\raw", "*.raw"); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { oxM }; var aaSet = new AminoAcidSet(searchModifications, 2); var postProcessor = new MsGfPostProcessor(specFiles, resultPath, new Tolerance(5), new Tolerance(3)); var numId = postProcessor.PostProcessing(outputFilePath); Console.WriteLine("NumId: {0}", numId); }
public void TestRescoring() { //const string specFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; //const string sequence = "SGWYELSKSSNDQFKFVLKAGNGEVILTSELYTGKSGAMNGIESVQTNSPIEARYAKEVAKNDKPYFNLKAANHQIIGTSQMYSSTA"; //const int scanNum = 4084; const string sequence = "SKTKHPLPEQWQKNQEAAKATQVAFDLDEKFQYSIRKAALDAGVSPSDQIRTILGLSVSRRPTRPRLTVSLNADDYVQLAEKYDLNADAQLEIKRRVLEDLVRFVAED"; const int scanNum = 5448; const int charge = 11; // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var composition = aaSet.GetComposition(sequence) + Composition.H2O; var run = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10)); var scores = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum); Console.WriteLine("Total Score = " + scores.Score); Console.WriteLine("#Fragments = " + scores.NumMatchedFrags); }
public void TestRescoring(int scanNum, int charge, string sequence, double expectedScore) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var composition = aaSet.GetComposition(sequence) + Composition.H2O; var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName, 0, 0); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10)); var scores = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum); Console.WriteLine("Total Score = " + scores.Score); Console.WriteLine("#Fragments = " + scores.NumMatchedFrags); Assert.AreEqual(expectedScore, scores.Score, 0.0001); }
public void Test43KProtein() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var dethiomethylM = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false); var deamidatedN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); var deamidatedQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C', SequenceLocation.ProteinNTerm, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, dethiomethylM, acetylN, //phosphoS, //phosphoT, //phosphoY, deamidatedN, // deamidatedQ, glutathioneC, pyroCarbamidomethylC, nitrosylC, nethylmaleimideC }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); // var aaSet = new AminoAcidSet(); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(aaSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } var ms1Filter = new SimpleMs1Filter(); var ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(run); foreach (var ms2ScanNum in Ms2ScanNums) { ms2ScorerFactory.GetScorer(ms2ScanNum); } for (var numNTermCleavages = 0; numNTermCleavages <= 0; numNTermCleavages++) { if (numNTermCleavages > 0) { seqGraph.CleaveNTerm(); } var numProteoforms = seqGraph.GetNumProteoformCompositions(); var modCombs = seqGraph.GetModificationCombinations(); for (var modIndex = 0; modIndex < numProteoforms; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; var modCombinations = modCombs[modIndex]; foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass)) { var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) { continue; } var charge = (int) Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var scorer = ms2ScorerFactory.GetMs2Scorer(ms2ScanNum); var score = seqGraph.GetFragmentScore(scorer); if (score <= 3) { continue; } var precursorIon = new Ion(protCompositionWithH2O, charge); var sequence = protSequence.Substring(numNTermCleavages); var pre = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1]; var post = annotation[annotation.Length - 1]; Console.WriteLine("{0}.{1}.{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", pre, sequence, post, ms2ScanNum, modCombinations, precursorIon.GetMostAbundantIsotopeMz(), precursorIon.Charge, precursorIon.Composition.Mass, score); } } } }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); var fileExt = new string[] { "IcTarget", "IcDecoy" }; foreach (var ext in fileExt) { var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, parser.NumData, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum; Assert.True(ms2Spec != null); var scores = scorer.GetScores(sequence, charge, scan); var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); var specEvalue = gf.GetSpectralEValue(scores.Score); var rowStr = parser.GetRows()[i]; var items = rowStr.Split('\t').ToArray(); var newRowStr = string.Join("\t", items, 0, 15); //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); lock (lines) { lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); } //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue); }); foreach (var line in lines) { writer.WriteLine(line); } } Console.WriteLine("Done"); } }
public void TestForVlad() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"D:\Research\Data\Vlad\raw\Alz_RA_C1_HCD_11012013_SW_03Nov2013.raw"; const string dbFilePath = @"D:\Research\Data\Vlad\database\ID_004221_1C042A1F.fasta"; //const string dbFilePath = @"D:\Research\Data\Vlad\database\HBA_MOUSE.fasta"; const string outputDir = @"D:\Research\Data\Vlad\Ic\POPSICLETest_M1"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var thrToAla = new SearchModification(Modification.ThrToAla, 'T', SequenceLocation.Everywhere, false); var dethiomethylM = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false); var deamidatedN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); var deamidatedQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); var serToAsn = new SearchModification(Modification.SerToAsn, 'S', SequenceLocation.Everywhere, false); var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C', SequenceLocation.ProteinNTerm, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, // glutathioneC, oxM, // dethiomethylM, acetylN, phosphoS, phosphoT, phosphoY // thrToAla, // serToAsn, // deamidatedN, // deamidatedQ, // pyroCarbamidomethylC }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 1; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = false; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestPrSm() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw"; //const string annotation = // "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" + // "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" + // "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" + // "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" + // "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" + // "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" + // "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._"; //var aaSet = new AminoAcidSet(); //const int charge = 60; //const int ms2ScanNum = 46661; const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const int ms2ScanNum = 19011; const int charge = 7; const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._"; var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true); var modVal = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0)); var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList(); searchMods.Add(acetylN); const int numMaxModsPerProtein = 1; var aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions()); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15); ms2Scorer.GetScorer(ms2ScanNum); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); Assert.NotNull(scorer, "Scorer is null!"); for (var i = 0; i < graph.GetNumProteoformCompositions(); i++) { graph.SetSink(i); Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10)); var refinedScore = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum); Console.WriteLine("Modifications: {0}", refinedScore.Modifications); Console.WriteLine("Composition: {0}", composition); Console.WriteLine("RefinedScores: {0}", refinedScore); } }
/// <summary> /// Initializes a new instance of the <see cref="FragmentationSequenceViewModel"/> class. /// </summary> public FragmentationSequenceViewModel() { FragmentationSequence = new FragmentationSequence( new Sequence(new List <AminoAcid>()), 1, null, ActivationMethod.HCD); var baseIonTypes = BaseIonType.AllBaseIonTypes.Select( bit => new BaseIonTypeViewModel { BaseIonType = bit, IsSelected = bit == BaseIonType.B || bit == BaseIonType.Y }); BaseIonTypes = new ReactiveList <BaseIonTypeViewModel>(baseIonTypes) { ChangeTrackingEnabled = true }; NeutralLosses = new ReactiveList <NeutralLossViewModel> { new NeutralLossViewModel { NeutralLoss = NeutralLoss.NoLoss, IsSelected = true }, new NeutralLossViewModel { NeutralLoss = NeutralLoss.H2O }, new NeutralLossViewModel { NeutralLoss = NeutralLoss.NH3 } }; NeutralLosses.ChangeTrackingEnabled = true; HeavyModifications = new SearchModification[0]; LabeledIonViewModels = new LabeledIonViewModel[0]; SelectedIonTypes = new IonType[0]; AddPrecursorIons = true; // HideAllIonsCommand deselects all ion types and neutral losses. HideAllIonsCommand = ReactiveCommand.Create(() => { AddPrecursorIons = false; foreach (var baseIonType in BaseIonTypes) { baseIonType.IsSelected = false; } foreach (var neutralLoss in NeutralLosses) { neutralLoss.IsSelected = neutralLoss.NeutralLoss == NeutralLoss.NoLoss && neutralLoss.IsSelected; } }); // When Base Ion Types are selected/deselected, update ion types. BaseIonTypes.ItemChanged.Where(x => x.PropertyName == "IsSelected") .Select(_ => GetIonTypes()) .Subscribe(ionTypes => SelectedIonTypes = ionTypes); // When Neutral Losses are selected/deselected, update ion types NeutralLosses.ItemChanged.Where(x => x.PropertyName == "IsSelected") .Select(_ => GetIonTypes()) .Subscribe(ionTypes => SelectedIonTypes = ionTypes); // When FragmentationSequence is set, select IonTypes for ActivationMethod. this.WhenAnyValue(x => x.FragmentationSequence) .Where(fragSeq => fragSeq != null) .Subscribe(fragSeq => SetActivationMethod(fragSeq.ActivationMethod)); // When fragmentation sequence changes, update labeled ions this.WhenAnyValue(x => x.FragmentationSequence, x => x.SelectedIonTypes, x => x.HeavyModifications, x => x.AddPrecursorIons) .SelectMany(async _ => await GetLabeledIonViewModels()) .Subscribe(livms => LabeledIonViewModels = livms); SelectAllIonsCommand = ReactiveCommand.Create(() => { foreach (var ion in BaseIonTypes) { ion.IsSelected = true; } AddPrecursorIons = true; }); IcParameters.Instance.WhenAnyValue(x => x.CidHcdIonTypes, x => x.EtdIonTypes) .Throttle(TimeSpan.FromMilliseconds(50), RxApp.TaskpoolScheduler) .Where(_ => FragmentationSequence != null) .Subscribe(_ => SetActivationMethod(FragmentationSequence.ActivationMethod)); }
public void TestGetScoreDistribution(int scanNum, string protSequence) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); if (!pbfFile.Exists) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFile); } const string modStr = ""; const int maxCharge = 20; const int minCharge = 1; const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass)); var stopwatch = Stopwatch.StartNew(); var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); stopwatch.Stop(); Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var stopwatch2 = Stopwatch.StartNew(); var sequence = Sequence.CreateSequence(protSequence, modStr, aaSet); var proteinMass = sequence.Mass + Composition.H2O.Mass; Console.WriteLine("Mass = {0}", proteinMass); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); stopwatch.Restart(); var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(scorer, proteinMass); stopwatch.Stop(); Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); stopwatch.Reset(); stopwatch.Start(); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); //gf.ComputeGeneratingFunction(graph); stopwatch.Stop(); Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var scoreDist = gf.GetScoreDistribution(); Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore); Console.WriteLine("{0} : {1}", "score", "specEValue"); for (var score = 15; score <= gf.MaximumScore; score++) { var specEvalue = gf.GetSpectralEValue(score); Console.WriteLine("{0} : {1}", score, specEvalue); } stopwatch2.Stop(); Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", stopwatch2.ElapsedMilliseconds / 1000.0d); }
/// <summary> /// Initializes a new instance of the <see cref="SearchModificationViewModel"/> class. /// Create new ModificationViewModel from searchModification /// </summary> /// <param name="searchModification">Search modification to create the SelectModificationViewModel from.</param> /// <param name="dialogService">Dialog service for opening dialogs from view model.</param> public SearchModificationViewModel(SearchModification searchModification, IDialogService dialogService) : this(dialogService) { SearchModification = searchModification; }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); if (pbfFile.DirectoryName == null) { Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName); } var fileExt = new[] { "IcTarget", "IcDecoy" }; foreach (var ext in fileExt) { var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, 30, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); if (!(run.GetSpectrum(scan) is ProductSpectrum ms2Spec)) { Console.WriteLine("Could not get the spectrum datafor scan {0}", scan); }