private void SubTrain(Dictionary<int, Dictionary<int, Composition>> dictionary, bool isTarget) { Dictionary<int, int> missingXicCounter; Dictionary<int, Dictionary<int, int>> corrScoreCounter; if (isTarget) { missingXicCounter = _missingXicCounterTarget; corrScoreCounter = _xicCorrScoreCounterTarget; } else { missingXicCounter = _missingXicCounterDecoy; corrScoreCounter = _xicCorrScoreCounterDecoy; } foreach (var charge in dictionary.Keys) { var subDictionary = dictionary[charge]; foreach (var scanNumber in subDictionary.Keys) { var composition = subDictionary[scanNumber]; var scorer = new TopDownScorer(composition, _run, _tolerance, null); var areXicMissing = scorer.AreXicMissing(charge, scanNumber); var corrScores = scorer.GetIsotopeCorrelationIntensityRawScores(charge, scanNumber); for (var c = TopDownScorer.MinCharge; c <= TopDownScorer.MaxCharge;c++ ) { if (areXicMissing[c - TopDownScorer.MinCharge]) { var t = missingXicCounter[charge]; missingXicCounter[charge] = t + 1; } else { var t = corrScoreCounter[c] ?? new Dictionary<int, int>(); var score = corrScores[c - TopDownScorer.MinCharge]; t[score] = t[score] + 1; } } } } }
private void SubTrain(Dictionary <int, Dictionary <int, Composition> > dictionary, bool isTarget) { Dictionary <int, int> missingXicCounter; Dictionary <int, Dictionary <int, int> > corrScoreCounter; if (isTarget) { missingXicCounter = _missingXicCounterTarget; corrScoreCounter = _xicCorrScoreCounterTarget; } else { missingXicCounter = _missingXicCounterDecoy; corrScoreCounter = _xicCorrScoreCounterDecoy; } foreach (var charge in dictionary.Keys) { var subDictionary = dictionary[charge]; foreach (var scanNumber in subDictionary.Keys) { var composition = subDictionary[scanNumber]; var scorer = new TopDownScorer(composition, _run, _tolerance, null); var areXicMissing = scorer.AreXicMissing(charge, scanNumber); var corrScores = scorer.GetIsotopeCorrelationIntensityRawScores(charge, scanNumber); for (var c = TopDownScorer.MinCharge; c <= TopDownScorer.MaxCharge; c++) { if (areXicMissing[c - TopDownScorer.MinCharge]) { var t = missingXicCounter[charge]; missingXicCounter[charge] = t + 1; } else { var t = corrScoreCounter[c] ?? new Dictionary <int, int>(); var score = corrScores[c - TopDownScorer.MinCharge]; t[score] = t[score] + 1; } } } } }
public void TestTopDownScoringForAllXics() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Search parameters const int numNTermCleavages = 1; // 30 const int minLength = 7; const int maxLength = 1000; //const int minCharge = 5; // 3 //const int maxCharge = 15; // 67 const int numMaxModsPerProtein = 0; // 6 var precursorTolerance = new Tolerance(10); const string dbFilePath = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.fasta"; //const string dbFilePath = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.icdecoy.KR.fasta"; //const string dbFilePath = @"..\..\..\TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta"; // const string dbFilePath = // @"C:\cygwin\home\kims336\Data\TopDown\ID_003558_56D73071.fasta"; var sw = new System.Diagnostics.Stopwatch(); sw.Start(); Console.Write("Reading raw file..."); const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); // Configure amino acid set // var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); // var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var targetDb = new FastaDatabase(dbFilePath); // targetDb.CreateDecoyDatabase(Enzyme.Trypsin); // System.Environment.Exit(1); var indexedDb = new IndexedDatabase(targetDb); var numProteins = 0; long totalProtCompositions = 0; //long numXics = 0; TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; sw.Reset(); sw.Start(); Console.WriteLine("Generating XICs..."); foreach (var protAnnotationAndOffset in indexedDb.IntactSequenceAnnotationsAndOffsets(minLength, maxLength)) { ++numProteins; //if (numProteins > 2000) break; if (numProteins % 1000 == 0) { Console.WriteLine("Processed {0} proteins", numProteins); } //Console.WriteLine(protAnnotation); var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotationAndOffset.Annotation); //Console.WriteLine(seqGraph.GetSequenceCompositions()[0]); if (seqGraph == null) continue; for (var nTermCleavages = 0; nTermCleavages <= numNTermCleavages; nTermCleavages++) { if(nTermCleavages > 0) seqGraph.CleaveNTerm(); var protCompositions = seqGraph.GetSequenceCompositions(); foreach (var protComposition in protCompositions) { totalProtCompositions++; // Console.WriteLine(protComposition); var scorer = new TopDownScorer(protComposition, run, precursorTolerance, null); var score = scorer.GetScore(); Console.WriteLine(score); } } } sw.Stop(); Console.WriteLine("NumProteins: {0}", numProteins); Console.WriteLine("NumProteinCompositions: {0}", totalProtCompositions); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestTopDownScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; const string protAnnotation = "A.AHAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK."; var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; //var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); var aaSet = new AminoAcidSet(searchModifications, 0); var precursorTolerance = new Tolerance(10); //Console.WriteLine(aaSet.GetAminoAcid('C').GetComposition()); // Create a sequence graph //var protSeq = protAnnotation.Substring(2, protAnnotation.Length - 4); var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); // TopDownScorer.MaxCharge = 60; // TopDownScorer.MinCharge = 3; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); foreach (var protComposition in seqGraph.GetSequenceCompositions()) { var mostAbundantIsotopeIndex = protComposition.GetMostAbundantIsotopeZeroBasedIndex(); Console.WriteLine("Composition\t{0}", protComposition); Console.WriteLine("MostAbundantIsotopeIndex\t{0}", mostAbundantIsotopeIndex); Console.WriteLine(new Ion(protComposition + Composition.H2O, 11).GetIsotopeMz(mostAbundantIsotopeIndex)); Console.WriteLine(); //for (var charge = TopDownScorer.MinCharge; charge <= TopDownScorer.MaxCharge; charge++) //{ var scorer = new TopDownScorer(protComposition, run, precursorTolerance, null); var score = scorer.GetScore(); Console.WriteLine(score); //var precursorIon = new Ion(protComposition + Composition.H2O, charge); //var xic = run.GetExtractedIonChromatogram(precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex), precursorTolerance); //Console.WriteLine(xic[0].ScanNum + " " + xic[1].ScanNum); //Console.WriteLine("ScanNum\t{0}", string.Join("\t", xic.Select(p => p.ScanNum.ToString()))); //Console.WriteLine("precursorCharge " + charge + "\t" + string.Join("\t", xic.Select(p => p.Intensity.ToString()))); // } Console.WriteLine("\nCharge\tm/z"); for (var charge = 9; charge <= 18; charge++) { var precursorIon = new Ion(protComposition + Composition.H2O, charge); Console.WriteLine("{0}\t{1}", charge, precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex)); } } // sw.Stop(); // Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestMsAlignPlusResults() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; const string msAlignPlusResultPath = @"C:\workspace\TopDown\E_coli_iscU_60_mock_MSAlign_ResultTable_sam.txt"; var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, 0); var precursorTolerance = new Tolerance(10); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); var writer = new StreamWriter(msAlignPlusResultPath+ ".txt"); var reader = new StreamReader(msAlignPlusResultPath); string s; while ((s=reader.ReadLine())!=null) { if (s.StartsWith("Data_file_name ")) { writer.WriteLine(s+"\tScore"); continue; } var token = s.Split('\t'); var annotation = token[13]; // Console.WriteLine("***\t" + annotation); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); if (seqGraph == null) { writer.WriteLine(s+"\tN/A"); continue; } var protCompositions = seqGraph.GetSequenceCompositions(); var scorer = new TopDownScorer(protCompositions[0], run, precursorTolerance, null); var score = scorer.GetScore(); writer.WriteLine(s+"\t"+score); Console.WriteLine(score); } writer.Close(); reader.Close(); }