public void TestBuildingReverseGraph() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { methylK, //pyroGluQ, oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } }
public void TestForVlad() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"D:\Research\Data\Vlad\raw\Alz_RA_C1_HCD_11012013_SW_03Nov2013.raw"; const string dbFilePath = @"D:\Research\Data\Vlad\database\ID_004221_1C042A1F.fasta"; //const string dbFilePath = @"D:\Research\Data\Vlad\database\HBA_MOUSE.fasta"; const string outputDir = @"D:\Research\Data\Vlad\Ic\POPSICLETest_M1"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var thrToAla = new SearchModification(Modification.ThrToAla, 'T', SequenceLocation.Everywhere, false); var dethiomethylM = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false); var deamidatedN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); var deamidatedQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); var serToAsn = new SearchModification(Modification.SerToAsn, 'S', SequenceLocation.Everywhere, false); var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C', SequenceLocation.ProteinNTerm, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, // glutathioneC, oxM, // dethiomethylM, acetylN, phosphoS, phosphoT, phosphoY // thrToAla, // serToAsn, // deamidatedN, // deamidatedQ, // pyroCarbamidomethylC }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 1; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = false; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestBuildingSequenceGraphLongProtein() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Configure amino acid set const int numMaxModsPerProtein = 6; var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { pyroGluQ, //dehydro, //cysteinylC, //glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //const string protAnnotation = "A.HAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK.-"; //const string protAnnotation = // "_.QQ._"; const string protAnnotation = "_.MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAALRKLMMNHQ._"; var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); var seqCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}", seqComposition); foreach (var composition in seqGraph.GetFragmentCompositions(modIndex, 0)) { //if (composition.GetMass() > seqComposition.GetMass()) { Console.WriteLine("***Seq: {0}, Frag: {1}", seqComposition, composition); } } } }
public void TestRescoring() { //const string specFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; //const string sequence = "SGWYELSKSSNDQFKFVLKAGNGEVILTSELYTGKSGAMNGIESVQTNSPIEARYAKEVAKNDKPYFNLKAANHQIIGTSQMYSSTA"; //const int scanNum = 4084; const string sequence = "SKTKHPLPEQWQKNQEAAKATQVAFDLDEKFQYSIRKAALDAGVSPSDQIRTILGLSVSRRPTRPRLTVSLNADDYVQLAEKYDLNADAQLEIKRRVLEDLVRFVAED"; const int scanNum = 5448; const int charge = 11; // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var composition = aaSet.GetComposition(sequence) + Composition.H2O; var run = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10)); var scores = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum); Console.WriteLine("Total Score = " + scores.Score); Console.WriteLine("#Fragments = " + scores.NumMatchedFrags); }
public void TestTopDownScoringForAllXics() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Search parameters const int numNTermCleavages = 1; // 30 const int minLength = 7; const int maxLength = 1000; //const int minCharge = 5; // 3 //const int maxCharge = 15; // 67 const int numMaxModsPerProtein = 0; // 6 var precursorTolerance = new Tolerance(10); const string dbFilePath = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.fasta"; //const string dbFilePath = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.icdecoy.KR.fasta"; //const string dbFilePath = @"..\..\..\TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta"; // const string dbFilePath = // @"C:\cygwin\home\kims336\Data\TopDown\ID_003558_56D73071.fasta"; var sw = new System.Diagnostics.Stopwatch(); sw.Start(); Console.Write("Reading raw file..."); const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); // Configure amino acid set // var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); // var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var targetDb = new FastaDatabase(dbFilePath); // targetDb.CreateDecoyDatabase(Enzyme.Trypsin); // System.Environment.Exit(1); var indexedDb = new IndexedDatabase(targetDb); var numProteins = 0; long totalProtCompositions = 0; //long numXics = 0; TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; sw.Reset(); sw.Start(); Console.WriteLine("Generating XICs..."); foreach (var protAnnotationAndOffset in indexedDb.IntactSequenceAnnotationsAndOffsets(minLength, maxLength)) { ++numProteins; //if (numProteins > 2000) break; if (numProteins % 1000 == 0) { Console.WriteLine("Processed {0} proteins", numProteins); } //Console.WriteLine(protAnnotation); var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotationAndOffset.Annotation); //Console.WriteLine(seqGraph.GetSequenceCompositions()[0]); if (seqGraph == null) continue; for (var nTermCleavages = 0; nTermCleavages <= numNTermCleavages; nTermCleavages++) { if(nTermCleavages > 0) seqGraph.CleaveNTerm(); var protCompositions = seqGraph.GetSequenceCompositions(); foreach (var protComposition in protCompositions) { totalProtCompositions++; // Console.WriteLine(protComposition); var scorer = new TopDownScorer(protComposition, run, precursorTolerance, null); var score = scorer.GetScore(); Console.WriteLine(score); } } } sw.Stop(); Console.WriteLine("NumProteins: {0}", numProteins); Console.WriteLine("NumProteinCompositions: {0}", totalProtCompositions); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestSequenceGraph() { var methodName = MethodBase.GetCurrentMethod().Name; ShowStarting(methodName); var phosPhoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosPhoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosPhoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var fixCarbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var searchModifications = new List<SearchModification> { phosPhoS, phosPhoT, phosPhoY, oxM, fixCarbamidomethylC }; //var searchModifications = new List<SearchModification> { phosPhoT, fixCarbamidomethylC }; const int numMaxModsPepPeptide = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPepPeptide); const string annotation = "_.STR._"; var pepSeq = annotation.Substring(2, annotation.Length - 4); Console.WriteLine(aaSet.GetComposition(pepSeq)); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine(graph.GetUnmodifiedSequenceComposition()); Assert.AreEqual(graph.GetUnmodifiedSequenceComposition(), aaSet.GetComposition(pepSeq)); Console.WriteLine("Annotation Compositions:"); var index = -1; foreach (var composition in graph.GetSequenceCompositions()) { Console.WriteLine(++index+": "+composition); } //const int seqIndex = 1; //Console.WriteLine("Fragment Compositions (" + seqIndex +")"); //var scoringGraph = graph.GetScoringGraph(seqIndex); //foreach (var composition in scoringGraph.GetCompositions()) //{ // Console.WriteLine(composition); //} }
public void TestMsAlignPlusResults() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; const string msAlignPlusResultPath = @"C:\workspace\TopDown\E_coli_iscU_60_mock_MSAlign_ResultTable_sam.txt"; var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, 0); var precursorTolerance = new Tolerance(10); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); var writer = new StreamWriter(msAlignPlusResultPath+ ".txt"); var reader = new StreamReader(msAlignPlusResultPath); string s; while ((s=reader.ReadLine())!=null) { if (s.StartsWith("Data_file_name ")) { writer.WriteLine(s+"\tScore"); continue; } var token = s.Split('\t'); var annotation = token[13]; // Console.WriteLine("***\t" + annotation); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); if (seqGraph == null) { writer.WriteLine(s+"\tN/A"); continue; } var protCompositions = seqGraph.GetSequenceCompositions(); var scorer = new TopDownScorer(protCompositions[0], run, precursorTolerance, null); var score = scorer.GetScore(); writer.WriteLine(s+"\t"+score); Console.WriteLine(score); } writer.Close(); reader.Close(); }
public void TestTopDownScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; const string protAnnotation = "A.AHAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK."; var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; //var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); var aaSet = new AminoAcidSet(searchModifications, 0); var precursorTolerance = new Tolerance(10); //Console.WriteLine(aaSet.GetAminoAcid('C').GetComposition()); // Create a sequence graph //var protSeq = protAnnotation.Substring(2, protAnnotation.Length - 4); var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); // TopDownScorer.MaxCharge = 60; // TopDownScorer.MinCharge = 3; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); foreach (var protComposition in seqGraph.GetSequenceCompositions()) { var mostAbundantIsotopeIndex = protComposition.GetMostAbundantIsotopeZeroBasedIndex(); Console.WriteLine("Composition\t{0}", protComposition); Console.WriteLine("MostAbundantIsotopeIndex\t{0}", mostAbundantIsotopeIndex); Console.WriteLine(new Ion(protComposition + Composition.H2O, 11).GetIsotopeMz(mostAbundantIsotopeIndex)); Console.WriteLine(); //for (var charge = TopDownScorer.MinCharge; charge <= TopDownScorer.MaxCharge; charge++) //{ var scorer = new TopDownScorer(protComposition, run, precursorTolerance, null); var score = scorer.GetScore(); Console.WriteLine(score); //var precursorIon = new Ion(protComposition + Composition.H2O, charge); //var xic = run.GetExtractedIonChromatogram(precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex), precursorTolerance); //Console.WriteLine(xic[0].ScanNum + " " + xic[1].ScanNum); //Console.WriteLine("ScanNum\t{0}", string.Join("\t", xic.Select(p => p.ScanNum.ToString()))); //Console.WriteLine("precursorCharge " + charge + "\t" + string.Join("\t", xic.Select(p => p.Intensity.ToString()))); // } Console.WriteLine("\nCharge\tm/z"); for (var charge = 9; charge <= 18; charge++) { var precursorIon = new Ion(protComposition + Composition.H2O, charge); Console.WriteLine("{0}\t{1}", charge, precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex)); } } // sw.Stop(); // Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestForSbepData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //// Salmonella const string specFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TopDown\SBEP_STM_001_02272012_Aragon.raw"; const string dbFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002166_F86E3B2F.fasta"; const string outputDir = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Results\Mod_M2"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } if (!Directory.Exists(outputDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, outputDir); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 2; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestDdaPlus() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // QC_Shew QE const string specFilePath = @"H:\Research\DDAPlus\raw\20140701_yeast_DDA_01.raw"; const string dbFilePath = @"H:\Research\DDAPlus\database\Yeast_SGD_withContam.fasta"; const string outputDir = @"H:\Research\DDAPlus\Test"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerPeptide = 2; var searchModifications = new List<SearchModification> { //carbamidomethylC, acetylN, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerPeptide); const int ntt = 2; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestBottomUpSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, ntt); }
public void TestGettingSequence() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string annotation = "_.AMCMC._"; const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var methylC = new SearchModification(Modification.Methylation, 'C', SequenceLocation.Everywhere, true); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.PeptideNTerm, false); var searchModifications = new List<SearchModification> { //carbamidomethylC, //methylC, methylK, //pyroGluQ, oxM, //acetylN }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var composition = protCompositions[modIndex]; Console.WriteLine("{0}\t{1}", composition, composition.Mass); var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(new DummyScorer()); if (curScoreAndModifications != null) Console.WriteLine("Score: {0}, Modifications: {1}", curScoreAndModifications.Item1, curScoreAndModifications.Item2); } }
public void TestForAaronData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownAaron\raw\MTB_intact_1.raw"; const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownAaron\database\ID_003121_998584F8.fasta"; const string outputDir = @"C:\cygwin\home\kims336\Data\TopDownAaron\Ic\Mode1_07"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var tevFp2C = new SearchModification(Modification.TevFp2, 'S', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, //glutathioneC, //nitrosylC, //nethylmaleimideC, oxM, acetylN, tevFp2C }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 1; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestCreatingAminoAcidSet() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Configure amino acid set var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.PeptideNTerm, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { acetylN, pyroGluQ, oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); aaSet.Display(); }
public void TestNTermMods() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.QARTKQTARK._"; // Configure amino acid set var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { acetylN, pyroGluQ, //oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //aaSet.Display(); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } Console.WriteLine("*** Cleave N-term"); seqGraph.CleaveNTerm(); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } }
public void Test43KProtein() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var dethiomethylM = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false); var deamidatedN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); var deamidatedQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C', SequenceLocation.ProteinNTerm, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, glutathioneC, oxM, dethiomethylM, acetylN, //phosphoS, //phosphoT, //phosphoY, deamidatedN, // deamidatedQ, glutathioneC, pyroCarbamidomethylC, nitrosylC, nethylmaleimideC }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); // var aaSet = new AminoAcidSet(); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(aaSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) return; var ms1Filter = new SimpleMs1Filter(); var ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(run); foreach(var ms2ScanNum in Ms2ScanNums) ms2ScorerFactory.GetScorer(ms2ScanNum); for (var numNTermCleavages = 0; numNTermCleavages <= 0; numNTermCleavages++) { if (numNTermCleavages > 0) seqGraph.CleaveNTerm(); var numProteoforms = seqGraph.GetNumProteoformCompositions(); var modCombs = seqGraph.GetModificationCombinations(); for (var modIndex = 0; modIndex < numProteoforms; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; var modCombinations = modCombs[modIndex]; foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass)) { var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) continue; var charge = (int) Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var scorer = ms2ScorerFactory.GetMs2Scorer(ms2ScanNum); var score = seqGraph.GetFragmentScore(scorer); if (score <= 3) continue; var precursorIon = new Ion(protCompositionWithH2O, charge); var sequence = protSequence.Substring(numNTermCleavages); var pre = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1]; var post = annotation[annotation.Length - 1]; Console.WriteLine("{0}.{1}.{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", pre, sequence, post, ms2ScanNum, modCombinations, precursorIon.GetMostAbundantIsotopeMz(), precursorIon.Charge, precursorIon.Composition.Mass, score); } } } }
public void TestIcRescoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string specFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; //const string icResultPath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_Map07_Re.icdresult"; //const string outputPath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_Map07_Re_Rescored.icdresult"; const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.raw"; const string icResultPath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.ictresult"; const string outputPath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.ictresult"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(icResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, icResultPath); } var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, glutathioneC, nitrosylC, nethylmaleimideC, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var tolerance = new Tolerance(10.0); var rescorer = new IcRescorer(specFilePath, icResultPath, outputPath, aaSet, tolerance, 0.7); Console.WriteLine("Done"); }
public void TestForJiaData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // QC_Shew //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; //const string dbFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta"; //const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\database\Test.fasta"; // Jia's data const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.raw"; const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta"; const string outputDir = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\D1_1_Mode1"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); //var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, glutathioneC, nitrosylC, nethylmaleimideC, oxM, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 1; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestPrSm() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw"; //const string annotation = // "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" + // "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" + // "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" + // "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" + // "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" + // "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" + // "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._"; //var aaSet = new AminoAcidSet(); //const int charge = 60; //const int ms2ScanNum = 46661; const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const int ms2ScanNum = 19011; const int charge = 7; const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._"; var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true); var modVal = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0)); var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList(); searchMods.Add(acetylN); const int numMaxModsPerProtein = 1; var aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions()); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15); ms2Scorer.GetScorer(ms2ScanNum); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); Assert.NotNull(scorer, "Scorer is null!"); for (var i = 0; i < graph.GetNumProteoformCompositions(); i++) { graph.SetSink(i); Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10)); var refinedScore = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum); Console.WriteLine("Modifications: {0}", refinedScore.Modifications); Console.WriteLine("Composition: {0}", composition); Console.WriteLine("RefinedScores: {0}", refinedScore); } }
public void TestForQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // QC_Shew const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string dbFilePath = @"D:\MSPathFinder\Fasta\ID_002216_235ACCEA.fasta"; const string outputDir = @"D:\MassSpecFiles\training\test"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int searchMode = 2; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void ProcessMhcData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultPath = @"D:\Research\Data\ImmunoPeptidomics\Benchmarking\IPA\carone_C1309_All.tsv"; const string outputFilePath = @"D:\Research\Data\ImmunoPeptidomics\Benchmarking\IPA\IPA_Summary.tsv"; var specFiles = Directory.GetFiles(@"D:\Research\Data\ImmunoPeptidomics\Benchmarking\raw", "*.raw"); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { oxM }; var aaSet = new AminoAcidSet(searchModifications, 2); var postProcessor = new MsGfPostProcessor(specFiles, resultPath, new Tolerance(5), new Tolerance(3)); var numId = postProcessor.PostProcessing(outputFilePath); Console.WriteLine("NumId: {0}", numId); }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); var fileExt = new string[] {"IcTarget", "IcDecoy"}; foreach (var ext in fileExt) { var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, parser.NumData, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum; Assert.True(ms2Spec != null); var scores = scorer.GetScores(sequence, charge, scan); var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); var specEvalue = gf.GetSpectralEValue(scores.Score); var rowStr = parser.GetRows()[i]; var items = rowStr.Split('\t').ToArray(); var newRowStr = string.Join("\t", items, 0, 15); //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); lock (lines) { lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); } //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue); }); foreach (var line in lines) writer.WriteLine(line); } Console.WriteLine("Done"); } }
public void TestNumberOfProteoforms() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string annotation = "_.AMCMC._"; const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var methylC = new SearchModification(Modification.Methylation, 'C', SequenceLocation.Everywhere, true); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.PeptideNTerm, false); var searchModifications = new List<SearchModification> { //carbamidomethylC, //methylC, methylK, //pyroGluQ, oxM, //acetylN }; const int numMaxModsPerProtein = 4; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var protCompositions = seqGraph.GetSequenceCompositions(); var modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("\n#Protoeoforms by mod combinations: "); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { Console.Write((modIndex == 0) ? "No modifications" : modCombs[modIndex].ToString()); Console.Write("\t"); Console.WriteLine("{0}", seqGraph.GetNumProteoformSequences(modIndex)); } Console.WriteLine("\n#Protoeoforms by number of modificaionts: "); for (var nMod = 0; nMod <= numMaxModsPerProtein; nMod++) { Console.Write("#modificaitons = {0}", nMod); Console.Write("\t"); Console.WriteLine("{0}", seqGraph.GetNumProteoformSequencesByNumMods(nMod)); } }
public void TestMaccoss(string specFilePath) { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const string dbFilePath = @"D:\Research\Data\UW\QExactive\M_musculus_Uniprot_withContam.fasta"; const string outputDir = @"D:\Research\Data\UW\QExactive\Ic_NTT1_Mass"; if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); //var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); //var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 0; var searchModifications = new List<SearchModification> { carbamidomethylC, //acetylN, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int ntt = 1; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestBottomUpSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, ntt); }
public void TestCreatingHistoneGraph() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const int numMaxModsPerProtein = 11; // Histone H4 const string annotation = "_.MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG._"; // Histone H3.1 // const string annotation = // "_.MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA._"; var acetylR = new SearchModification(Modification.Acetylation, 'R', SequenceLocation.Everywhere, false); var acetylK = new SearchModification(Modification.Acetylation, 'K', SequenceLocation.Everywhere, false); var methylR = new SearchModification(Modification.Methylation, 'R', SequenceLocation.Everywhere, false); var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); var diMethylR = new SearchModification(Modification.DiMethylation, 'R', SequenceLocation.Everywhere, false); var diMethylK = new SearchModification(Modification.DiMethylation, 'K', SequenceLocation.Everywhere, false); var triMethylR = new SearchModification(Modification.TriMethylation, 'R', SequenceLocation.Everywhere, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { acetylR, acetylK, methylR, methylK, diMethylR, diMethylK, triMethylR, phosphoS, phosphoT, phosphoY }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var graph = SequenceGraph.CreateGraph(aaSet, annotation); var numFragCompositions = graph.GetNumFragmentCompositions(); var numProteoforms = graph.GetNumProteoformCompositions(); var numSeqCompositions = graph.GetNumProteoformCompositions(); Console.WriteLine("NumFragmentCompositions: " + numFragCompositions); Console.WriteLine("NumProteoforms: " + numProteoforms); Console.WriteLine("NumSequenceCompositions: " + numSeqCompositions); }
public void TestQcShewQExactive() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // QC_Shew QE const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; const string dbFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\ID_003456_9B916A8B.fasta"; const string outputDir = @"C:\cygwin\home\kims336\Data\QCShewQE\Ic_NTT2_Mass"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 0; var searchModifications = new List<SearchModification> { //carbamidomethylC, acetylN, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const int ntt = 2; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term bool? tda = true; // true: target & decoy, false: target, null: decoy TestBottomUpSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, ntt); }
public void TestGraphWithModifications() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.MIALNKTPQTIVFYKPYGVLCQFTDNSAHPRPTLKDYINLPDLYPVGRLDQDSEGLLLLTSNGKLQHRLAHREFAHQRTYFAQVEGSPTDEDLEPLRRGITFADYPTRPAIAKIITEPDFPPRNPPIRYRASIPTSWLSITLTEGRNRQVRRMTAAVGFPTLRLVRVQIQVTGRSPQQGKGKSAATWCLTLEGLSPGQWRPLTPWEENFCQQLLTGNPNGPWQKKFGDRR._"; var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, glutathioneC, nitrosylC, nethylmaleimideC, oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); var seqCompositions = seqGraph.GetSequenceCompositions(); var modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("*** Before cleavage: {0}", seqCompositions.Length); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}, ModComb: {1}", seqComposition, modCombs[modIndex]); } seqGraph.CleaveNTerm(); seqCompositions = seqGraph.GetSequenceCompositions(); modCombs = seqGraph.GetModificationCombinations(); Console.WriteLine("*** After cleavage: {0}", seqCompositions.Length); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}, ModComb: {1}", seqComposition, modCombs[modIndex]); } }
public void TestGetScoreDistribution() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string idFileFolder = @"D:\MassSpecFiles\training\IdScoring\MSPF_trainset"; const int scanNum = 5927; const string protSequence = "MNKSELIEKIASGADISKAAAGRALDSFIAAVTEGLKEGDKISLVGFGTFEVRERAERTGRNPQTGEEIKIAAAKIPAFKAGKALKDAVN"; const string modStr = ""; var idFile = string.Format(@"{0}\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv", idFileFolder); if (!File.Exists(idFile)) return; //Console.WriteLine(dataset); if (!File.Exists(rawFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile); } const int maxCharge = 20; const int minCharge = 1; const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); var run = PbfLcMsRun.GetLcMsRun(rawFile); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass)); var stopwatch = Stopwatch.StartNew(); var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); stopwatch.Stop(); Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var n = 0; var stopwatch2 = Stopwatch.StartNew(); var sequence = Sequence.CreateSequence(protSequence, modStr, aaSet); var proteinMass = sequence.Mass + Composition.H2O.Mass; Console.WriteLine("Mass = {0}", proteinMass); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); stopwatch.Restart(); var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(scorer, proteinMass); stopwatch.Stop(); Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); stopwatch.Reset(); stopwatch.Start(); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); //gf.ComputeGeneratingFunction(graph); stopwatch.Stop(); Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var scoreDist = gf.GetScoreDistribution(); Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore); for (var score = 45; score <= gf.MaximumScore; score++) { var specEvalue = gf.GetSpectralEValue(score); Console.WriteLine("{0} : {1}", score, specEvalue); } stopwatch2.Stop(); Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", (stopwatch2.ElapsedMilliseconds) / 1000.0d); }