public void CreateTargetList() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string databaseFilePath = @"D:\Research\Data\IPRG2014\database\SpikedInPeptides.fasta"; if (!File.Exists(databaseFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, databaseFilePath); } var database = new FastaDatabase(databaseFilePath); database.Read(); var indexedDatabase = new IndexedDatabase(database); var numTargets = 0; var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); Console.WriteLine("Peptide\tFormula\tProtein"); foreach (var annotationAndOffset in indexedDatabase.AnnotationsAndOffsets(6, 30, 1, 1, Enzyme.Trypsin)) { var annotation = annotationAndOffset.Annotation; var peptide = annotation.Substring(2, annotation.Length - 4); var offset = annotationAndOffset.Offset; Console.WriteLine("{0}\t{1}\t{2}", peptide, (aaSet.GetComposition(peptide) + Composition.H2O).ToPlainString(), database.GetProteinName(offset)); numTargets++; } Console.WriteLine("NumTargets: {0}", numTargets); }
public void TestSequenceGraph() { var methodName = MethodBase.GetCurrentMethod().Name; ShowStarting(methodName); var phosPhoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosPhoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosPhoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var fixCarbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true); var searchModifications = new List <SearchModification> { phosPhoS, phosPhoT, phosPhoY, oxM, fixCarbamidomethylC }; //var searchModifications = new List<SearchModification> { phosPhoT, fixCarbamidomethylC }; const int numMaxModsPepPeptide = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPepPeptide); const string annotation = "_.STR._"; var pepSeq = annotation.Substring(2, annotation.Length - 4); Console.WriteLine(aaSet.GetComposition(pepSeq)); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine(graph.GetUnmodifiedSequenceComposition()); Assert.AreEqual(graph.GetUnmodifiedSequenceComposition(), aaSet.GetComposition(pepSeq)); Console.WriteLine("Annotation Compositions:"); var index = -1; foreach (var composition in graph.GetSequenceCompositions()) { Console.WriteLine(++index + ": " + composition); } //const int seqIndex = 1; //Console.WriteLine("Fragment Compositions (" + seqIndex +")"); //var scoringGraph = graph.GetScoringGraph(seqIndex); //foreach (var composition in scoringGraph.GetCompositions()) //{ // Console.WriteLine(composition); //} }
public void TestNominalMassErrors() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const int minLength = 300; const int maxLength = 400; var sw = new System.Diagnostics.Stopwatch(); var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_003962_71E1A1D4.fasta")); var db = new FastaDatabase(fastaFile.FullName); db.Read(); var indexedDb = new IndexedDatabase(db); var numSequences = 0L; sw.Start(); var hist = new long[11]; var aaSet = new AminoAcidSet(); foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength)) { ++numSequences; var annotation = peptideAnnotationAndOffset.Annotation; var sequenceStr = annotation.Substring(2, annotation.Length - 4); var sequenceComp = aaSet.GetComposition(sequenceStr); var mass = sequenceComp.Mass; var nominalMass = sequenceComp.NominalMass; var error = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass; var errorBin = error + hist.Length / 2; if (errorBin < 0) { errorBin = 0; } if (errorBin >= hist.Length) { errorBin = hist.Length - 1; } hist[errorBin]++; } Console.WriteLine("NumSequences: {0}", numSequences); for (var i = 0; i < hist.Length; i++) { Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences); } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestNominalMassErrors() { const int minLength = 300; const int maxLength = 400; var sw = new System.Diagnostics.Stopwatch(); // const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta"; const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta"; //const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\TargetProteins.fasta"; var db = new FastaDatabase(dbFile); db.Read(); var indexedDb = new IndexedDatabase(db); var numSequences = 0L; sw.Start(); var hist = new long[11]; var aaSet = new AminoAcidSet(); foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength)) { ++numSequences; var annotation = peptideAnnotationAndOffset.Annotation; var sequenceStr = annotation.Substring(2, annotation.Length - 4); var sequenceComp = aaSet.GetComposition(sequenceStr); var mass = sequenceComp.Mass; var nominalMass = sequenceComp.NominalMass; var error = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass; var errorBin = error + hist.Length / 2; if (errorBin < 0) { errorBin = 0; } if (errorBin >= hist.Length) { errorBin = hist.Length - 1; } hist[errorBin]++; } Console.WriteLine("NumSequences: {0}", numSequences); for (var i = 0; i < hist.Length; i++) { Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences); } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void ComputeSpikedInPeptideMzHist() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string pepListFile = @"C:\cygwin\home\kims336\Data\DIA\SpikedPeptides.txt"; var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); var charges = new[] { 2 }; var hist = new int[4]; var sum = 0; Console.WriteLine("Peptide\tCharge\tMz"); foreach (var line in File.ReadLines(pepListFile)) { if (line.Length == 0) { continue; } var peptide = line; var composition = aaSet.GetComposition(peptide) + Composition.H2O; foreach (var charge in charges) { var precursorIon = new Ion(composition, charge); var precursorIonMz = precursorIon.GetMonoIsotopicMz(); if (precursorIonMz < 400 || precursorIonMz >= 900) { continue; } var histIndex = (int)((precursorIonMz - 400) / 125); hist[histIndex]++; Console.WriteLine("{0}\t{1}\t{2}\t{3}", peptide, charge, precursorIonMz, histIndex); sum++; } } Console.WriteLine("\nRange\tNum\tRatio"); for (var i = 0; i < hist.Length; i++) { Console.WriteLine("{0}-{1}\t{2}\t{3}", 400 + i * 125, 525 + i * 125, hist[i], hist[i] / (float)sum); } }
public static Dictionary <string, MatchedTagSet> GetProteinToMatchedTagsMap( IEnumerable <SequenceTag.SequenceTag> tags, SearchableDatabase searchableDb, AminoAcidSet aaSet, Tolerance tolerance, Tolerance relaxedTolerance) { var fastaDb = searchableDb.FastaDatabase; var proteinsToTags = new Dictionary <string, MatchedTagSet>(); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); if (matchedIndices.Length > MaxNumProteinMatchesPerTag) { continue; } foreach (var index in matchedIndices) { var proteinName = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetZeroBasedPositionInProtein(index); var mass = aaSet.GetComposition(tag.Sequence).Mass; var matchedTag = new MatchedTag(tag, startIndex) { Mass = mass }; MatchedTagSet existingMatchedTagSet; if (proteinsToTags.TryGetValue(proteinName, out existingMatchedTagSet)) { existingMatchedTagSet.Add(matchedTag); } else { var proteinSequence = fastaDb.GetProteinSequence(proteinName); if (proteinSequence == null) { proteinSequence = proteinName; } var matchedTagSet = new MatchedTagSet(proteinSequence, aaSet, tolerance, relaxedTolerance); matchedTagSet.Add(matchedTag); proteinsToTags.Add(proteinName, matchedTagSet); } } } return(proteinsToTags); }
public void TestIsoProfile() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string sequence = "MWYMISAQDVENSLEKRLAARPAHLARLQELADEGRLLVAGPHPAIDSENPGDAGFSGSLVVADFDSLATAQAWADADPYFAAGVYQSVVVKPFKRVLP"; var aaSet = new AminoAcidSet(); var comp = aaSet.GetComposition(sequence) + Composition.H2O; var ion = new Ion(comp, 9); foreach (var i in ion.GetIsotopes(0.1)) { Console.WriteLine(ion.GetIsotopeMz(i.Index) + "\t" + i.Ratio); } }
private static MSMSSpectrum ReadSpectrum(string s) { var precursorMz = 0.0; var charge = 0; Sequence annotation = null; var peaks = new List <MSMSSpectrumPeak>(); var token = s.Split('\n'); foreach (var t in token) { if (t.Length == 0) { continue; } if (char.IsDigit(t[0])) { var p = t.Split(new [] { "\t", " " }, StringSplitOptions.None); peaks.Add(new MSMSSpectrumPeak(double.Parse(p[0]), double.Parse(p[1]))); } else if (t.StartsWith("CHARGE")) { var chargeStr = t.Substring(t.IndexOf('=') + 1).Trim(); if (chargeStr.StartsWith("+")) { chargeStr = chargeStr.Substring(1); } if (chargeStr.EndsWith("+")) { chargeStr = chargeStr.Substring(0, chargeStr.Length - 1); } charge = int.Parse(chargeStr); } else if (t.StartsWith("SEQ")) { var annotationStr = t.Substring(t.LastIndexOf('=') + 1); var precursorComposition = AminoAcidSet.GetComposition(annotationStr); var peptideComposition = precursorComposition + Composition.H2O; annotation = new Sequence(annotationStr, AminoAcidSet); } else if (t.StartsWith("PEPMASS")) { var p = t.Substring(t.IndexOf('=') + 1).Split(new[] { "\\s+" }, StringSplitOptions.None); precursorMz = double.Parse(p[0]); } } return(new MSMSSpectrum(charge, precursorMz, annotation, peaks)); }
public void TestXicGen() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); // Test var tolerance = new Tolerance(30); const string peptide = "AIANGQVDGFPTQEECR"; const int targetScanNum = 37633; const int charge = 2; //const string peptide = "IVDTNGAGDAFAGGFMAGLTK"; //const int targetScanNum = 67513; //const int charge = 3; var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge); Console.WriteLine("Theoretical isotopomer profile:"); foreach (var p in precursorIon.GetIsotopes(0.1)) { Console.WriteLine("{0}\t{1}", precursorIon.GetIsotopeMz(p.Index), p.Ratio); } var xicArr = new Dictionary <int, Xic>(); var basePeakIndex = precursorIon.Composition.GetMostAbundantIsotopeZeroBasedIndex(); for (var i = -1; i < 3; i++) { xicArr[i] = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetIsotopeMz(i), tolerance, targetScanNum); } for (var i = -1; i < 3; i++) { Console.WriteLine("\nIndex: {0}", i); Console.WriteLine("m/z: {0}", precursorIon.GetIsotopeMz(i)); Console.WriteLine("#XicPeaks: {0}", xicArr[i].Count); Console.WriteLine("Intensity: {0}", xicArr[i].GetSumIntensities() / xicArr[basePeakIndex].GetSumIntensities()); Console.WriteLine("Correlation: {0}", xicArr[i].GetCorrelation(xicArr[basePeakIndex])); } }
public void ValidateIcResultsWithModifications() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var parser = new TsvFileParser(resultFilePath); var sequences = parser.GetData("Sequence"); var modifications = parser.GetData("Modifications"); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray(); var aaSet = new AminoAcidSet(); for (var i = 0; i < parser.NumData; i++) { var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O; var modComposition = Composition.Zero; var modsStr = modifications[i].Substring(1, modifications[i].Length - 2); var mods = modsStr.Split(','); foreach (var modStr in mods) { if (modStr.Length == 0) { continue; } var modName = modStr.Split()[0]; var mod = Modification.Get(modName); modComposition += mod.Composition; } var compFromSeqAndMods = sequenceComp + modComposition; Assert.True(compFromSeqAndMods.Equals(compositions[i])); } }
public void TestPeptide() { var methodName = MethodBase.GetCurrentMethod().Name; ShowStarting(methodName); //const string sequence = "MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG"; // Histone H4 const string sequence = "IRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG"; // Histone H4 //const string sequence = "MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGIVVDYVLEFDVPDELIVDRIVGRRVHAASGRVYHVKFNPPKVEGKDDVTGEDLTTRKDDQEETVRKRLVEYHQMTAPLIGYYQKEAEAGNTKYAKVDGTQAVADVRAALEKILG"; //const string sequence = "MNKTQLIDVIAEKAELSKTQAKAALESTLAAITESLKEGDAVQLVGFGTFKVNHRAERTGRNPQTGKEIKIAAANVPAFVSGKALKDAVK"; //const string sequence = // "METTKPSFQDVLEFVRLFRRKNKLQREIQDVEKKIRDNQKRVLLLDNLSDYIKPGMSVEAIQGIIASMKGDYEDRVDDYIIKNAELSKERRDISKKLKAMGEMKNGEAK"; var aaSet = new AminoAcidSet(); var composition = aaSet.GetComposition(sequence) + Composition.H2O; Console.WriteLine(composition); Console.WriteLine(composition.Mass); Console.WriteLine(composition.NominalMass); // 2nd isotope Console.WriteLine(composition.GetIsotopeMass(0)); Console.WriteLine(composition.GetIsotopeMass(1)); Console.WriteLine(composition.GetIsotopeMass(2)); //Assert.AreEqual(composition.ToPlainString(), "C34H51N7O14"); Console.WriteLine("Isotopomer Envelope:"); foreach (var e in composition.GetIsotopomerEnvelopeRelativeIntensities()) { Console.WriteLine(e); } Console.WriteLine(); Console.WriteLine("Isotope ions:"); var ion = new Ion(composition + Composition.H2O, 13); foreach (var p in ion.GetIsotopes(0.1)) { Console.WriteLine("{0}\t{1}", ion.GetIsotopeMz(p.Index), p.Ratio); } Console.WriteLine(); }
public void TestRescoring() { //const string specFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; //const string sequence = "SGWYELSKSSNDQFKFVLKAGNGEVILTSELYTGKSGAMNGIESVQTNSPIEARYAKEVAKNDKPYFNLKAANHQIIGTSQMYSSTA"; //const int scanNum = 4084; const string sequence = "SKTKHPLPEQWQKNQEAAKATQVAFDLDEKFQYSIRKAALDAGVSPSDQIRTILGLSVSRRPTRPRLTVSLNADDYVQLAEKYDLNADAQLEIKRRVLEDLVRFVAED"; const int scanNum = 5448; const int charge = 11; // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var composition = aaSet.GetComposition(sequence) + Composition.H2O; var run = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10)); var scores = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum); Console.WriteLine("Total Score = " + scores.Score); Console.WriteLine("#Fragments = " + scores.NumMatchedFrags); }
public void TestTimeToComputeIsotopomerProfiles() { var methodName = MethodBase.GetCurrentMethod().Name; ShowStarting(methodName); var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); const string dbFilePath = @"C:\cygwin\home\kims336\Data\IMS_Sarc\HumanPeptides.txt"; if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } int numPeptides = 0; var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var isoCalc = IsotopicDistributionCalculator.Instance; foreach (var annotation in File.ReadLines(dbFilePath)) { ++numPeptides; var peptide = annotation.Substring(2, annotation.Length - 4); var composition = aaSet.GetComposition(peptide); var molFormula = composition.ToPlainString(); isoCalc.GetIsotopePattern(molFormula); composition.GetIsotopomerEnvelopeRelativeIntensities(); } Console.WriteLine("NumPeptides: " + numPeptides); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestRescoring(int scanNum, int charge, string sequence, double expectedScore) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var composition = aaSet.GetComposition(sequence) + Composition.H2O; var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName, 0, 0); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10)); var scores = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum); Console.WriteLine("Total Score = " + scores.Score); Console.WriteLine("#Fragments = " + scores.NumMatchedFrags); Assert.AreEqual(expectedScore, scores.Score, 0.0001); }
public void FilteringEfficiencyQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); //const string prot = // "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA"; //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass; //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass))); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var scanNums = tsvReader.GetData("Scan(s)"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("E-value"); var sequences = tsvReader.GetData("Peptide"); //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv"; //var tsvReader = new TsvFileParser(resultFilePath); //var scanNums = tsvReader.GetData("ScanNum"); //var charges = tsvReader.GetData("Charge"); //var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Sequence"); var aaSet = new AminoAcidSet(); var seqSet = new HashSet <string>(); var allSeqSet = new HashSet <string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < scores.Count; i++) { var score = Convert.ToDouble(scores[i]); if (score > 1E-4) { continue; } //if (score < 10) continue; var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]); if (sequence == null || sequence.Contains("(")) { continue; } //var sequence = sequences[i]; var composition = aaSet.GetComposition(sequence) + Composition.H2O; var precursorIon = new Ion(composition, charge); var isValid = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestFusionDdaData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parameters const double relativeIntensityThreshold = 0.7; const double precursorTolerancePpm = 20; //const double isotopeRatioTolerance = 2; //const double correlationThreshold = 0.3; const double fdrThreshold = 0.01; const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var tolerance = new Tolerance(precursorTolerancePpm); var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); const string resultFilePath = @"D:\Research\Data\UW\Fusion\oldResult\WT_D_DDA_130412065618_10ppm_TI2_SGD_Decoy.tsv"; var numTargets = 0; var numValidTargets = 0; var numDecoys = 0; var numValidDecoys = 0; foreach (var line in File.ReadLines(resultFilePath)) { if (line.StartsWith("#")) { continue; } var token = line.Split('\t'); if (token.Length != 16) { continue; } var qValue = Convert.ToDouble(token[14]); if (qValue > fdrThreshold) { continue; } var peptide = token[8].Replace("C+57.021", "C"); var scanNum = Convert.ToInt32(token[2]); var charge = Convert.ToInt32(token[7]); var protein = token[9]; var isDecoy = protein.StartsWith("XXX_"); if (isDecoy) { numDecoys++; } else { numTargets++; } var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge); var basePeakIndex = precursorIon.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var baseXic = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum); var baseIntensity = baseXic.GetSumIntensities(); var isValid = true; foreach (var isotope in precursorIon.GetIsotopes(relativeIntensityThreshold)) { if (isotope.Index == basePeakIndex) { continue; } var isotopeMz = precursorIon.GetIsotopeMz(isotope.Index); var xic = run.GetPrecursorExtractedIonChromatogram(isotopeMz, tolerance, scanNum); if (xic.Count == 0) { isValid = false; break; } //if (xic.Count > 0) //{ // var isotopeRatio = xic.GetSumIntensities() / baseIntensity / isotope.Item2; // var correlation = xic.GetCorrelation(baseXic); // if (isotopeRatio > 0.8 && isotopeRatio < 1.2 // && correlation > 0.8) // { // isValid = true; // } //} // Check if isotope ratio is within tolerance //if (isotopeRatio > isotopeRatioTolerance || isotopeRatio < 1 / isotopeRatioTolerance) //{ // isValid = false; // //Console.WriteLine("Off ratio\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", isDecoy, peptide, scanNum, charge, precursorIon.GetMonoIsotopicMz(), isotopeMz, isotopeRatio); // break; //} // Check if correlation is high //if (correlation < correlationThreshold) //{ // isValid = false; // //Console.WriteLine("Low correlation\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", isDecoy, peptide, scanNum, charge, precursorIon.GetMonoIsotopicMz(), isotopeMz, correlation); // break; //} } if (isValid && !isDecoy) { numValidTargets++; } else if (isValid) { numValidDecoys++; } //Console.WriteLine("{0}\t{1}\t{2}", peptide, scanNum, charge); } Console.WriteLine("#Targets: {0}", numTargets); Console.WriteLine("#ValidTargets: {0}\t{1}", numValidTargets, numValidTargets / (double)numTargets); Console.WriteLine("#Decoys: {0}", numDecoys); Console.WriteLine("#ValidDecoys: {0}\t{1}", numValidDecoys, numValidDecoys / (double)numDecoys); sw.Stop(); Console.WriteLine(@"TimeForPrecursorValidation {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void AnalyizeFusionDdaData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parameters //const double relativeIntensityThreshold = 0.7; const double precursorTolerancePpm = 20; const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); const double fdrThreshold = 0.01; var tolerance = new Tolerance(precursorTolerancePpm); var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); const string resultFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618_10ppm_TI2_SGD_Decoy.tsv"; Console.WriteLine("IsDecoy\tPeptide\tScanNum\tCharge\tSpecEValue\tQValue\tPrecursorMz" + "\tTheo0\tTheo1\tTheo2\tTheo3" + "\tObs0\tCorr0\tObs1\tCorr1\tObs2\tCorr2\tObs3\tCorr3\tObs-1\tCorr-1\tObs0.5\tCorr0.5"); foreach (var line in File.ReadLines(resultFilePath)) { if (line.StartsWith("#")) { continue; } var token = line.Split('\t'); if (token.Length != 16) { continue; } var qValue = Convert.ToDouble(token[14]); if (qValue > fdrThreshold) { continue; } var peptide = token[8].Replace("C+57.021", "C"); var scanNum = Convert.ToInt32(token[2]); var charge = Convert.ToInt32(token[7]); var specEValue = Convert.ToDouble(token[12]); var protein = token[9]; var isDecoy = protein.StartsWith("XXX_"); var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge); var baseXic = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum); var baseIntensity = baseXic.GetSumIntensities(); Console.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", (isDecoy ? 1 : 0), peptide, scanNum, charge, specEValue, qValue, precursorIon.GetMonoIsotopicMz()); var isotopeIndices = new double[] { 0, 1, 2, 3, -1, 0.5 }; var theoIsotopes = precursorIon.GetIsotopes(0.01); var numIsotopes = 0; foreach (var theoIsotope in theoIsotopes) { Console.Write("\t" + theoIsotope.Ratio); if (++numIsotopes == 4) { break; } } foreach (var isotopeIndex in isotopeIndices) { var isotopeMz = precursorIon.GetIsotopeMz(isotopeIndex); var xic = run.GetPrecursorExtractedIonChromatogram(isotopeMz, tolerance, scanNum); var relativeIntensity = xic.GetSumIntensities() / baseIntensity; var correlation = xic.GetCorrelation(baseXic); Console.Write("\t{0}\t{1}", relativeIntensity, correlation); } Console.WriteLine(); } }