public void TestReadingTmtResultFile() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string filePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSGFPlusResultTMT10.tsv"; if (!File.Exists(filePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, filePath); } var parser = new TsvFileParser(filePath); var pepStrs = parser.GetData("Peptide"); var formulaStrs = parser.GetData("Formula"); Assert.True(pepStrs.Count == formulaStrs.Count); var peptides = pepStrs.Select(Sequence.GetSequenceFromMsGfPlusPeptideStr).ToList(); var formulae = formulaStrs.Select(Composition.Parse).ToList(); Assert.True(peptides.Count == formulae.Count); for (var i = 0; i < peptides.Count; i++) { Assert.True((peptides[i].Composition + Composition.H2O).Equals(formulae[i])); } }
private void Rescore(string icResultFilePath, string outputFilePath) { var parser = new TsvFileParser(icResultFilePath); var sequences = parser.GetData("Sequence"); var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var modIndex = parser.GetHeaders().IndexOf("Modifications"); var rows = parser.GetRows(); var headers = parser.GetHeaders(); using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames()); for (var i = 0; i < parser.NumData; i++) { var row = rows[i]; var seqStr = sequences[i]; var charge = charges[i]; var scanNum = scanNums[i]; var composition = compositions[i]; var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum); var token = row.Split('\t'); for (var j = 0; j < token.Length; j++) { if(j != modIndex) writer.Write(token[j]+"\t"); else writer.Write("["+scores.Modifications+"]"+"\t"); } writer.WriteLine(scores); } } }
private void Read(string isosFileName) { var icrToolsparser = new TsvFileParser(isosFileName, ','); var monoMassArr = icrToolsparser.GetData("monoisotopic_mw").Select(Convert.ToDouble).ToArray(); var scanArray = icrToolsparser.GetData("scan_num").Select(s => Convert.ToInt32(s)).ToArray(); var chargeArray = icrToolsparser.GetData("charge").Select(s => Convert.ToInt32(s)).ToArray(); var fitStringArr = icrToolsparser.GetData("fit"); var fitArray = fitStringArr == null ? null : icrToolsparser.GetData("fit").Select(Convert.ToDouble).ToArray(); var featureCountFiltered = 0; var minMass = double.MaxValue; var maxMass = 0.0; for (var i = 0; i < monoMassArr.Length; i++) { if (fitArray != null && fitArray[i] > _fitScoreThreshold || chargeArray[i] <= 1) continue; featureCountFiltered++; var scan = scanArray[i]; var monoMass = monoMassArr[i]; if (minMass > monoMass) minMass = monoMass; if (maxMass < monoMass) maxMass = monoMass; var minScan = _run.GetPrevScanNum(scan, 1); var maxScan = _run.GetNextScanNum(scan, 1); _lcMsMatchMap.SetMatches(monoMass, minScan, maxScan); } Console.Write(@"{0}/{1} features loaded...", featureCountFiltered, monoMassArr.Length); _lcMsMatchMap.CreateSequenceMassToMs2ScansMap(_run, _massTolerance, minMass, maxMass); }
public IList<SpectrumMatch> Read() { var specMatches = new List<SpectrumMatch>(); var tsvFile = new TsvFileParser(_fileName); var precursorCharges = tsvFile.GetData(PrecursorChargeHeader); var scans = tsvFile.GetData(ScanHeader); var peptides = tsvFile.GetData(BottomUpPeptideHeader); if (scans == null) throw new FormatException(); var pepQValues = tsvFile.GetData(PepQValueHeader); var formulas = tsvFile.GetData(FormulaHeader); var peptideSet = new HashSet<string>(); for (int i = 0; i < peptides.Count; i++) { if (Convert.ToDouble(pepQValues[i]) > PepQValueThreshold || peptideSet.Contains(peptides[i])) continue; peptideSet.Add(peptides[i]); var scanNum = Convert.ToInt32(scans[i]); // var spectrum = lcms.GetSpectrum(scanNum); // var spec = spectrum as ProductSpectrum; // if (spec == null || spec.ActivationMethod != Act) continue; int precursorCharge = Convert.ToInt32(precursorCharges[i]); specMatches.Add((formulas != null && formulas[i] != null) ? new SpectrumMatch(peptides[i], DataFileFormat.IcBottomUp, _lcms, scanNum, precursorCharge, _decoy, formulas[i]) : new SpectrumMatch(peptides[i], DataFileFormat.IcBottomUp, _lcms, scanNum, precursorCharge, _decoy)); } return specMatches; }
public IList<SpectrumMatch> Read() { var specMatches = new List<SpectrumMatch>(); var tsvFile = new TsvFileParser(_fileName); var precursorCharges = tsvFile.GetData(PrecursorChargeHeader); var scans = tsvFile.GetData(ScanHeader); var peptides = tsvFile.GetData(TopDownPeptideHeader); if (peptides != null) { var peptideSet = new HashSet<string>(); const double filterThreshold = QValueThreshold; var filterValues = tsvFile.GetData(QValueHeader); var aset = new AminoAcidSet(); for (int i = 0; i < peptides.Count; i++) { if (Convert.ToDouble(filterValues[i]) > filterThreshold || peptideSet.Contains(peptides[i])) continue; peptideSet.Add(peptides[i]); var scanNum = Convert.ToInt32(scans[i]); int precursorCharge = Convert.ToInt32(precursorCharges[i]); specMatches.Add(new SpectrumMatch(new Sequence(peptides[i], aset), _lcms, scanNum, precursorCharge, _decoy)); } } return specMatches; }
public void AddMostAbundantIsotopePeakIntensity() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const string resultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTda.tsv"; var parser = new TsvFileParser(resultFilePath); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var scanNums = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var precursorIntensities = new double[parser.NumData]; var tolerance = new Tolerance(10); for (var i = 0; i < parser.NumData; i++) { var scanNum = scanNums[i]; var composition = compositions[i]; var charge = charges[i]; var precursorIon = new Ion(composition, charge); var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var isotopePeaks = precursorSpec.GetAllIsotopePeaks(precursorIon, tolerance, 0.1); if (isotopePeaks != null) { var maxIntensity = 0.0; for (var j = 0; j < isotopePeaks.Length; j++) { if (isotopePeaks[j] != null && isotopePeaks[j].Intensity > maxIntensity) maxIntensity = isotopePeaks[j].Intensity; } precursorIntensities[i] = maxIntensity; } } // Writing const string newResultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTdaWithIntensities.tsv"; using (var writer = new StreamWriter(newResultFilePath)) { writer.WriteLine(string.Join("\t", parser.GetHeaders())+"\t"+"PrecursorIntensity"); for (var i = 0; i < parser.NumData; i++) { writer.WriteLine(parser.GetRows()[i]+"\t"+precursorIntensities[i]); } } Console.WriteLine("Done"); }
public void SummarizeAnilResults() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFolder = @"H:\Research\Anil\Oct28"; if (!Directory.Exists(resultFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultFolder); } var actMethods = new[] { ActivationMethod.CID, ActivationMethod.ETD, ActivationMethod.HCD }; Console.WriteLine("Data\tCID\t\tETD\t\tHCD\t"); Console.WriteLine("\tNumId\tMaxMass\tNumId\tMaxMass\tNumId\tMaxMass"); foreach (var rawFile in Directory.GetFiles(resultFolder, "*.raw")) { var datasetName = Path.GetFileNameWithoutExtension(rawFile); var resultFile = Path.Combine(Path.GetDirectoryName(rawFile), datasetName + "_IcTda.tsv"); var numId = new Dictionary<ActivationMethod, int>(); var maxMass = new Dictionary<ActivationMethod, double>(); foreach (var actMethod in actMethods) { numId[actMethod] = 0; maxMass[actMethod] = 0.0; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var tsvParser = new TsvFileParser(resultFile); var qValues = tsvParser.GetData("QValue").Select(Convert.ToDouble).ToArray(); var scanNums = tsvParser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var masses = tsvParser.GetData("Mass").Select(Convert.ToDouble).ToArray(); for (var i = 0; i < qValues.Length; i++) { if (qValues[i] > 0.01) break; var scanNum = scanNums[i]; var spec = run.GetSpectrum(scanNum) as ProductSpectrum; Assert.True(spec != null); ++numId[spec.ActivationMethod]; var mass = masses[i]; if (mass > maxMass[spec.ActivationMethod]) maxMass[spec.ActivationMethod] = mass; } Console.Write(datasetName); foreach (var actMethod in actMethods) { Console.Write("\t" + numId[actMethod]); Console.Write("\t" + maxMass[actMethod]); } Console.WriteLine(); } }
private void Read(string ms1FtFileName) { var ftFileParser = new TsvFileParser(ms1FtFileName); var monoMassArr = ftFileParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray(); var minScanArray = ftFileParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray(); var maxScanArray = ftFileParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray(); var repScanArray = ftFileParser.GetData("RepScan").Select(s => Convert.ToInt32(s)).ToArray(); var minChargeArray = ftFileParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray(); var maxChargeArray = ftFileParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray(); var scoreArray = ftFileParser.GetData("LikelihoodRatio").Select(Convert.ToDouble).ToArray(); var featureCountFiltered = 0; for (var i = 0; i < monoMassArr.Length; i++) { //if (flagArray[i] == 0 && probArray[i] < _minProbability) continue; if (scoreArray[i] < _minLikelihoodRatio) continue; featureCountFiltered++; var monoMass = monoMassArr[i]; _lcMsChargeMap.SetMatches(monoMass, minScanArray[i], maxScanArray[i], repScanArray[i], minChargeArray[i], maxChargeArray[i]); } // NOTE: The DMS Analysis Manager looks for this statistic; do not change it Console.Write(@"{0}/{1} features loaded...", featureCountFiltered, monoMassArr.Length); _lcMsChargeMap.CreateMassToScanNumMap(); }
public void TestClusterCentricSearch() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string pfResultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V4_JP_Len500\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(pfResultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pfResultFilePath); } var tsvReader = new TsvFileParser(pfResultFilePath); var ms2Scans = tsvReader.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var compositions = tsvReader.GetData("Composition").ToArray(); var qValues = tsvReader.GetData("QValue").Select(Convert.ToDouble).ToArray(); var compScanTable = new Dictionary<string, IList<int>>(); for(var i=0; i<qValues.Length; i++) { var qValue = qValues[i]; if (qValue > 0.01) break; IList<int> scanNums; if(compScanTable.TryGetValue(compositions[i], out scanNums)) { scanNums.Add(ms2Scans[i]); } else { compScanTable.Add(compositions[i], new List<int> {ms2Scans[i]}); } } Console.Write("NumCompositions: {0}", compScanTable.Keys.Count); //const string featureFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V4_JP_Len500\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; }
private void Parse(string tagFilePath) { var tagParser = new TsvFileParser(tagFilePath); var scan = tagParser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray(); var sequence = tagParser.GetData("SequenceTag").ToArray(); var isPrefix = tagParser.GetData("IsPrefix").Select(s => s.Equals("1")).ToArray(); var flankingMass = tagParser.GetData("FlankingMass").Select(Convert.ToDouble).ToArray(); for (var i = 0; i < tagParser.NumData; i++) { if (sequence[i].Length < _minTagLength) continue; var tag = new SequenceTag(scan[i], sequence[i], isPrefix[i], flankingMass[i]); IList<SequenceTag> tagList; if (_scanToTags.TryGetValue(scan[i], out tagList)) { if (tagList.Count < _numTagsPerScan) tagList.Add(tag); } else { _scanToTags.Add(scan[i], new List<SequenceTag> { tag }); } } }
public void ValidateIcResultsWithModifications() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var parser = new TsvFileParser(resultFilePath); var sequences = parser.GetData("Sequence"); var modifications = parser.GetData("Modifications"); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray(); var aaSet = new AminoAcidSet(); for (var i = 0; i < parser.NumData; i++) { var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O; var modComposition = Composition.Zero; var modsStr = modifications[i].Substring(1, modifications[i].Length - 2); var mods = modsStr.Split(','); foreach(var modStr in mods) { if (modStr.Length == 0) continue; var modName = modStr.Split()[0]; var mod = Modification.Get(modName); modComposition += mod.Composition; } var compFromSeqAndMods = sequenceComp + modComposition; Assert.True(compFromSeqAndMods.Equals(compositions[i])); } }
private void Rescore(string msAlignFilePath, string outputFilePath) { var parser = new TsvFileParser(msAlignFilePath); var sequences = parser.GetData("Peptide"); var scanNums = parser.GetData("Scan(s)").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray(); var rows = parser.GetRows(); var headers = parser.GetHeaders(); using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames()); for (var i = 0; i < parser.NumData; i++) { var row = rows[i]; var seqStr = SimpleStringProcessing.GetStringBetweenDots(sequences[i]); if (seqStr == null || seqStr.Contains("(")) continue; //TODO: currently ignore ids with modifications var composition = AASet.GetComposition(seqStr); //var sequence = new Sequence(seqStr, AASet); //if (sequence == null) //{ // Console.WriteLine("Ignore illegal sequence: {0}", seqStr); // continue; //} var charge = charges[i]; var scanNum = scanNums[i]; var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum); if (scores == null) continue; writer.WriteLine("{0}\t{1}", row, scores); } } }
public void FilteringEfficiencyQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); //const string prot = // "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA"; //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass; //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass))); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var scanNums = tsvReader.GetData("Scan(s)"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("E-value"); var sequences = tsvReader.GetData("Peptide"); //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv"; //var tsvReader = new TsvFileParser(resultFilePath); //var scanNums = tsvReader.GetData("ScanNum"); //var charges = tsvReader.GetData("Charge"); //var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Sequence"); var aaSet = new AminoAcidSet(); var seqSet = new HashSet<string>(); var allSeqSet = new HashSet<string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < scores.Count; i++) { var score = Convert.ToDouble(scores[i]); if (score > 1E-4) continue; //if (score < 10) continue; var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]); if (sequence == null || sequence.Contains("(")) continue; //var sequence = sequences[i]; var composition = aaSet.GetComposition(sequence) + Composition.H2O; var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) continue; ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestMs1Filtering() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = // @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.tsv"; @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.decoy.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); //const int minPrecursorCharge = 3; //const int maxPrecursorCharge = 30; //const int tolerancePpm = 15; var tolerance = new Tolerance(15); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 0.7, 40); ////var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //ISequenceFilter ms1Filter = ms1BasedFilter; var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var qValues = tsvReader.GetData("QValue"); var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Annotation"); //var hist = new int[11]; Console.WriteLine("ScanNum\tScore\tPrecursor\tNext\tSum\tNextIsotope\tLessCharge\tMoreCharge\tMax\tNumXicPeaks"); for (var i = 0; i < compositions.Count; i++) { if (qValues != null) { var qValue = Convert.ToDouble(qValues[i]); if (qValue > 0.01) continue; } var scanNum = Convert.ToInt32(scanNums[i]); var composition = Composition.Parse(compositions[i]); var charge = Convert.ToInt32(charges[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) continue; var score = Convert.ToDouble(scores[i]); var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var preIsotopeCorr = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var nextIsotopeCorr = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var xicMostAbundant = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum); var apexScanNum = xicMostAbundant.GetApexScanNum(); if (apexScanNum < run.MinLcScan) apexScanNum = scanNum; //var sumSpec = run.GetSummedMs1Spectrum(apexScanNum); // var apexIsotopeCorr = sumSpec.GetCorrScore(precursorIon, tolerance, 0.1); // var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; var xicNextIsotope = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz() + Constants.C13MinusC12/charge, tolerance, scanNum); var plusOneIsotopeCorr = xicMostAbundant.GetCorrelation(xicNextIsotope); var precursorIonChargeMinusOne = new Ion(composition, charge - 1); var xicChargeMinusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargeMinusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum); var chargeMinusOneCorr = xicMostAbundant.GetCorrelation(xicChargeMinusOne); var precursorIonChargePlusOne = new Ion(composition, charge + 1); var xicChargePlusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargePlusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum); var chargePlusOneCorr = xicMostAbundant.GetCorrelation(xicChargePlusOne); //var max = new[] {preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr}.Max(); //Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}", // scanNum, score, preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr, max, xicMostAbundant.Count); } //Console.WriteLine("Histogram"); //for (var i = 0; i < hist.Length; i++) //{ // Console.WriteLine("{0:f1}\t{1}", i / 10.0, hist[i]); //} }
public void FilteringEfficiency() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm); // //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run, // minPrecursorCharge, maxPrecursorCharge, // 0, 0, // 600.0, 1800.0, new Tolerance(tolerancePpm), null); //ms1BasedFilter.CachePrecursorMatchesBinCentric(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("Score"); var qvalues = tsvReader.GetData("QValue"); var sequences = tsvReader.GetData("Sequence"); var sequenceCount = new Dictionary<string, int>(); for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) continue; } else { var score = Convert.ToDouble(scores[i]); if (score < 13) continue; } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) continue; var sequence = sequences[i]; int count; if (sequenceCount.TryGetValue(sequence, out count)) sequenceCount[sequence] = count + 1; else sequenceCount[sequence] = 1; } //var sequences = tsvReader.GetData("Annotation"); var seqSet = new HashSet<string>(); var allSeqSet = new HashSet<string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) continue; } else { var score = Convert.ToDouble(scores[i]); if (score < 13) continue; } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) continue; ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance); ////xic.Display(); //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false); //var apexSpec = run.GetSpectrum(apexScanNum); //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons/(double)(maxBinNum-minBinNum+1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs/(double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count/(double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public List<ProteinSpectrumMatch> ReadMsPathFinderResult(string msPathFinderResultPath, int maxPrsm, double minScore = 3, double maxScore = int.MaxValue) { var parser = new TsvFileParser(msPathFinderResultPath); var prsmList = new List<ProteinSpectrumMatch>(); var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score"); var qValColumn = parser.GetData("QValue"); var evalueColumn = parser.GetData("SpecEValue"); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Sequence")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); var mass = double.Parse(parser.GetData("Mass")[i]); var protName = parser.GetData("ProteinName")[i]; var protDesc = parser.GetData("ProteinDesc")[i]; var charge = int.Parse(parser.GetData("Charge")[i]); var firstResId = int.Parse(parser.GetData("Start")[i]); var lastResId = int.Parse(parser.GetData("End")[i]); var score = double.Parse(scoreColumn[i]); var mod = parser.GetData("Modifications")[i]; var evalue = (evalueColumn != null) ? double.Parse(parser.GetData("SpecEValue")[i]) : 0; var pre = parser.GetData("Pre")[i]; var post = parser.GetData("Post")[i]; var proteinLen = int.Parse(parser.GetData("ProteinLength")[i]); if (score < minScore || score > maxScore) continue; if (qValColumn != null) { var fdr = double.Parse(qValColumn[i]); if (fdr > FdrCutoff) continue; } var sequenceText = GetSequenceText(sequence, mod); var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsPathFinder) { SequenceText = sequenceText, Modifications = mod, Pre = pre, Post = post, ProteinLength = proteinLen, SpectralEvalue = evalue, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) break; } return prsmList; }
private double[][] LoadTable(string fname) { if (!File.Exists(fname)) throw new FileNotFoundException("Missing score datafile: " + fname); var parser = new TsvFileParser(fname); var table = new double[_massBins.Length][]; for (var i = 0; i < _massBins.Length; i++) { table[i] = new double[NumberOfBins]; for (var k = 0; k < NumberOfBins; k++) { var colData = parser.GetData(string.Format("{0}", k)); table[i][k] = double.Parse(colData[i]); } } return table; }
public void DiaRankScore() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataFile = @"\\protoapps\UserData\Wilkins\BottomUp\HCD_QCShew\raw\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; const string tsvFile = @"\\protoapps\UserData\Wilkins\BottomUp\HCD_QCShew\tsv\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.tsv"; if (!File.Exists(dataFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataFile); } if (!File.Exists(tsvFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tsvFile); } var parser = new TsvFileParser(tsvFile); var sequences = parser.GetData("Peptide"); var charges = parser.GetData("Charge"); var scans = parser.GetData("ScanNum"); var lcms = InMemoryLcMsRun.GetLcMsRun(dataFile, 0, 0); var rankScorer = new DiaRankScore( @"C:\Users\wilk011\Documents\DataFiles\TestFolder\HCD_QExactive_Tryp.txt"); using ( var outFile = new StreamWriter(@"C:\Users\wilk011\Documents\DataFiles\TestFolder\HCD_QCShew_Score_2.txt")) { outFile.WriteLine("Target\tDecoy"); for (int i = 0; i < sequences.Count; i++) { string sequenceStr = sequences[i]; int charge = Convert.ToInt32(charges[i]); int scan = Convert.ToInt32(scans[i]); var sequence = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequenceStr); var decoySeq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequenceStr); decoySeq.Reverse(); var decoyStr = decoySeq.Aggregate("", (current, aa) => current + aa); decoyStr = SimpleStringProcessing.Mutate(decoyStr, sequence.Count/2); decoySeq = Sequence.GetSequenceFromMsGfPlusPeptideStr(decoyStr); var sequenceScore = rankScorer.GetScore(sequence, charge, scan, lcms); var decoyScore = rankScorer.GetScore(decoySeq, charge, scan, lcms); outFile.WriteLine("{0}\t{1}", sequenceScore, decoyScore); } } }
public void TestFeatureIdMatching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var resultParser = new MsPathFinderParser(resultFilePath); const double qValueThreshold = 0.01; const double tolerancePpm = 13; const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); if (!File.Exists(rawFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName); } var run = PbfLcMsRun.GetLcMsRun(rawFileName); var idList = resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList(); var idMassList = idList.Select(id => id.Mass).ToList(); var idFlag = new bool[idList.Count]; // Parse sequence tags var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); const int minTagLength = 6; const int numProtMatches = 4; // const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta"; if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft"); var featureParser = new TsvFileParser(featureFileName); var minScan = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray(); var maxScan = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray(); var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray(); var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray(); var monoMass = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray(); var numFeaturesWithId = 0; var numFeaturesWithMs2 = 0; var numFeaturesWithTags = 0; var numFeaturesWithMatchingTags = 0; var numFeaturesWithTwoOrMoreMatchingTags = 0; var numFeaturesWithNoIdAndMatchingTags = 0; for (var i = 0; i < featureParser.NumData; i++) { var mass = monoMass[i]; // Find Id var tolDa = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1); var minMass = mass - tolDa; var maxMass = mass + tolDa; var index = idMassList.BinarySearch(mass); if (index < 0) index = ~index; var matchedId = new List<MsPathFinderId>(); // go down var curIndex = index - 1; while (curIndex >= 0) { var curId = idList[curIndex]; if (curId.Mass < minMass) break; if (curId.Scan > minScan[i] && curId.Scan < maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } --curIndex; } // go up curIndex = index; while (curIndex < idList.Count) { var curId = idList[curIndex]; if (curId.Mass > maxMass) break; if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } ++curIndex; } var hasId = false; if (matchedId.Any()) { ++numFeaturesWithId; hasId = true; } // Find MS2 scans // var numMs2Scans = 0; var tags = new List<SequenceTag>(); var hasMs2 = false; for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++) { var isolationWindow = run.GetIsolationWindow(scanNum); if (isolationWindow == null) continue; var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(mass / isolationWindowTargetMz); if (charge < minCharge[i] || charge > maxCharge[i]) continue; var mz = Ion.GetIsotopeMz(mass, charge, Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex); if (isolationWindow.Contains(mz)) { // ++numMs2Scans; tags.AddRange(tagParser.GetSequenceTags(scanNum)); hasMs2 = true; } } if (hasMs2) ++numFeaturesWithMs2; if (tags.Any()) ++numFeaturesWithTags; var protHist = new Dictionary<string, int>(); var hasMatchedTag = false; foreach (var tag in tags) { var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray(); if (matchedProteins.Any()) { hasMatchedTag = true; foreach (var protein in matchedProteins) { int num; if (protHist.TryGetValue(protein, out num)) protHist[protein] = num + 1; else protHist[protein] = 1; } } } if (hasMatchedTag) { ++numFeaturesWithMatchingTags; if (!hasId) ++numFeaturesWithNoIdAndMatchingTags; } if (protHist.Any()) { var maxOcc = protHist.Values.Max(); if (maxOcc >= numProtMatches) ++numFeaturesWithTwoOrMoreMatchingTags; } } Console.WriteLine("NumFeatures: {0}", featureParser.NumData); Console.WriteLine("NumId: {0}", idList.Count); Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData); for (var i = 0; i < idFlag.Length; i++) { if (!idFlag[i]) { Console.WriteLine(idList[i].Scan); } } // Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001))); // // Console.WriteLine(featureFileName); }
public void TestInitialScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string icResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\Ic_NTT2_03_NoMod_NoRescoring\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28_IcTarget.tsv"; if (!File.Exists(icResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, icResultPath); } var icParser = new TsvFileParser(icResultPath); var icScans = icParser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var icPeptides = icParser.GetData("Sequence"); var icScore = icParser.GetData("Score").Select(s => Convert.ToInt32(s)).ToArray(); var map = new Dictionary<string, int>(); for (var i = 0; i < icParser.NumData; i++) { map.Add(icScans[i]+":"+icPeptides[i], icScore[i]); } const string msgfPlusResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\NoMod.tsv"; if (!File.Exists(msgfPlusResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, msgfPlusResultPath); } var msgfPlusResults = new MsGfResults(msgfPlusResultPath); var matches = msgfPlusResults.GetMatchesAtPsmFdr(0.01); //Console.WriteLine("NumMatches: {0}", matches.Count); Console.WriteLine("ScanNum\tPeptide\tSpecEValue\tIcScore"); foreach (var match in matches) { var scanNum = match.ScanNum; var peptide = match.Peptide; var specEValue = match.SpecEValue; int score; if (!map.TryGetValue(scanNum + ":" + peptide, out score)) score = -1; Console.WriteLine("{0}\t{1}\t{2}\t{3}", scanNum, peptide, specEValue, score); } }
public List<ProteinSpectrumMatch> ReadMsGfPlusResult(string msgfResultPath, int maxPrsm) { var parser = new TsvFileParser(msgfResultPath); var prsmList = new List<ProteinSpectrumMatch>(); var prevScanNum = -1; for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Peptide")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); if (prevScanNum == scanNum) continue; prevScanNum = scanNum; var mz = double.Parse(parser.GetData("PrecursorMZ")[i]); var protName = parser.GetData("Protein")[i]; var protDesc = ""; var score = double.Parse(parser.GetData("MSGFScore")[i]); var charge = int.Parse(parser.GetData("Charge")[i]); var seq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequence); var sequenceText = GetSequenceText(seq); var mass = (mz - Constants.Proton)*charge; var firstResId = 0; var lastResId = 0; var fdr = Double.Parse(parser.GetData("QValue")[i]); if (fdr > FdrCutoff) continue; var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsGfPlus) { SequenceText = sequenceText, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) break; } return prsmList; }
public void CompareRt() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Q-Exactive //const string qeDdaResult = @"D:\Research\Data\UW\QExactive\DDA_All_Summary.tsv"; //const string qeDiaResult = @"D:\Research\Data\UW\QExactive\DIA_All_Summary.tsv"; // Fusion const string qeDdaResult = @"D:\Research\Data\UW\Fusion\DDA_Summary.tsv"; const string qeDiaResult = @"D:\Research\Data\UW\Fusion\DIA_Summary.tsv"; const string specFileDda = @"D:\Research\Data\UW\QExactive\82593_lv_mcx_DDA.raw"; var ddaReader = new XCaliburReader(specFileDda); var specFileToReader = new Dictionary<string, XCaliburReader>(); var specFilesDia = Directory.GetFiles(@"D:\Research\Data\UW\QExactive\", "*_DIA_*.raw"); foreach (var specFile in specFilesDia) { var specFileNoExt = Path.GetFileNameWithoutExtension(specFile); if (specFileNoExt == null) continue; var reader = new XCaliburReader(specFile); specFileToReader.Add(specFileNoExt, reader); } const string resultPath1 = qeDdaResult; const string resultPath2 = qeDiaResult; var result1 = new TsvFileParser(resultPath1); var result2 = new TsvFileParser(resultPath2); const double pepQValueThreshold = 0.01; var vennDiagram = new VennDiagram<string>(result1.GetPeptides(pepQValueThreshold), result2.GetPeptides(pepQValueThreshold)); var intersectionPeptides = vennDiagram.Intersection; var result1Peptides = result1.GetData("Peptide"); var result1ScanNums = result1.GetData("ScanNum"); var result2Peptides = result2.GetData("Peptide"); var result2ScanNums = result2.GetData("ScanNum"); var result2SpecFile = result2.GetData("#SpecFile"); Console.WriteLine("Peptide\tScanNum1\tScanNum2\tRt1\tRt2"); foreach (var peptide in intersectionPeptides) { var index1 = result1Peptides.IndexOf(peptide); var index2 = result2Peptides.IndexOf(peptide); var scanNum1 = Convert.ToInt32(result1ScanNums[index1]); var scanNum2 = Convert.ToInt32(result2ScanNums[index2]); var diaFile = Path.GetFileNameWithoutExtension(result2SpecFile[index2]); var reader1 = ddaReader; var reader2 = specFileToReader[diaFile]; Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", peptide.Replace("C+57.021", "C"), scanNum1, scanNum2, reader1.RtFromScanNum(scanNum1), reader2.RtFromScanNum(scanNum2)); } }
public void GenerateVennDiagramsPeMmr() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // No PE-MMR //const string noPeMmr = @"D:\Research\Data\PEMMR\iTRAQ_N33T34_10ug_100cm_300min_C2_061213.tsv"; // PE-MMR Scan based FDR //const string scanBasedPeMmr = @"D:\Research\Data\PEMMR\NewSpectra\iTRAQ_N33T34_10ug_100cm_300min_C2_061213_MX_PEMMR_UMCID_ScanFDR.tsv"; // UMC based FDR const string umcBasedPeMmr = @"D:\Research\Data\PEMMR\NewSpectra\iTRAQ_N33T34_10ug_100cm_300min_C2_061213_MX_PEMMR_UMCID_UMCFDR.tsv"; // IPA const string ipa = @"D:\Research\Data\PEMMR\Ox\IPA_Summary_TargetOnly.tsv"; const string resultPath1 = umcBasedPeMmr; const string resultPath2 = ipa; var result1 = new TsvFileParser(resultPath1); var result2 = new TsvFileParser(resultPath2); const double pepQValueThreshold = 0.01; var vennDiagram = new VennDiagram<string>(result1.GetPeptides(pepQValueThreshold), result2.GetPeptides(pepQValueThreshold)); Console.WriteLine("{0}\t{1}\t{2}", vennDiagram.Set1Only.Count + vennDiagram.Intersection.Count, vennDiagram.Intersection.Count, vennDiagram.Set2Only.Count + vennDiagram.Intersection.Count); Console.WriteLine("{0}\t{1}\t{2}", vennDiagram.Set1Only.Count, vennDiagram.Intersection.Count, vennDiagram.Set2Only.Count); foreach (var peptide in vennDiagram.Set2Only) { Console.WriteLine(peptide); var peptides = result2.GetData("Peptide"); } }
private bool Parse(string fileName) { var parser = new TsvFileParser(fileName); var scan = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var pre = parser.GetData("Pre").Where(s => s.Length == 1).Select(p => p[0]).ToArray(); if (pre.Length != parser.NumData) return false; var sequence = parser.GetData("Sequence").ToArray(); var post = parser.GetData("Post").Where(s => s.Length == 1).Select(p => p[0]).ToArray(); if (post.Length != parser.NumData) return false; var mod = parser.GetData("Modifications").ToArray(); var composition = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var proteinName = parser.GetData("ProteinName").ToArray(); var proteinDesc = parser.GetData("ProteinDesc").ToArray(); var proteinLength = parser.GetData("ProteinLength").Select(s => Convert.ToInt32(s)).ToArray(); var start = parser.GetData("Start").Select(s => Convert.ToInt32(s)).ToArray(); var end = parser.GetData("End").Select(s => Convert.ToInt32(s)).ToArray(); var charge = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var mostAbundantIsotopeMz = parser.GetData("MostAbundantIsotopeMz").Select(Convert.ToDouble).ToArray(); var mass = parser.GetData("Mass").Select(Convert.ToDouble).ToArray(); var numMatchedFragment = parser.GetData("#MatchedFragments").Select(s => Convert.ToInt32(s)).ToArray(); var qValue = parser.GetData("QValue").Select(Convert.ToDouble).ToArray(); var pepQValue = parser.GetData("PepQValue").Select(Convert.ToDouble).ToArray(); for (var i = 0; i < parser.NumData; i++) { var id = new MsPathFinderId(scan[i], pre[i], sequence[i], post[i], mod[i], composition[i], proteinName[i], proteinDesc[i], proteinLength[i], start[i], end[i], charge[i], mostAbundantIsotopeMz[i], mass[i], numMatchedFragment[i], qValue[i], pepQValue[i]) ; _idList.Add(id); if(!_scanNumToPrSm.ContainsKey(scan[i])) _scanNumToPrSm.Add(scan[i], id); } return true; }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); var fileExt = new string[] {"IcTarget", "IcDecoy"}; foreach (var ext in fileExt) { var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, parser.NumData, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum; Assert.True(ms2Spec != null); var scores = scorer.GetScores(sequence, charge, scan); var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); var specEvalue = gf.GetSpectralEValue(scores.Score); var rowStr = parser.GetRows()[i]; var items = rowStr.Split('\t').ToArray(); var newRowStr = string.Join("\t", items, 0, 15); //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); lock (lines) { lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); } //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue); }); foreach (var line in lines) writer.WriteLine(line); } Console.WriteLine("Done"); } }
public void CompareIpaIc() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultDir = @"D:\Research\Data\UW\QExactive\Ic_NTT2_03"; if (!Directory.Exists(resultDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultDir); } var targetPeptides = new HashSet<string>(); foreach (var icResultFilePath in Directory.GetFiles(resultDir, "*DIA*IcTarget.tsv")) { var icParser = new TsvFileParser(icResultFilePath); foreach (var peptide in icParser.GetData("Sequence")) targetPeptides.Add(peptide); } const string ipaResultPath = @"D:\Research\Data\UW\QExactive\DIA_All_Summary.tsv"; if (!File.Exists(ipaResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, methodName); } var parser = new TsvFileParser(ipaResultPath); var ipaPeptides = parser.GetPeptides(0.005).Select(p => p.Replace("C+57.021", "C")); var ipaOnly = 0; var both = 0; foreach (var ipaPeptide in ipaPeptides) { if (targetPeptides.Contains(ipaPeptide)) ++both; else { ++ipaOnly; Console.WriteLine(ipaPeptide); } } Console.WriteLine("Both: {0}, IpaOnly: {1}, Sum: {2}", both, ipaOnly, both+ipaOnly); }
public void CountMatchedProteins() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const int minTagLength = 3; var scanToProtein = new Dictionary<int, string>(); var idTag = new Dictionary<int, bool>(); const string resultFilePath = @"H:\Research\ProMex\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var parser = new TsvFileParser(resultFilePath); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var proteinNames = parser.GetData("ProteinName").ToArray(); var qValues = parser.GetData("QValue").Select(Convert.ToDouble).ToArray(); for (var i = 0; i < qValues.Length; i++) { if (qValues[i] > 0.01) break; scanToProtein.Add(scans[i], proteinNames[i]); idTag.Add(scans[i], false); } const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } // const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta"; // const string fastaFilePath = // @"D:\Research\Data\CommonContaminants\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta"; var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); Console.WriteLine("Sequence length: {0}", fastaDb.GetSequence().Length); const string tagFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.seqtag"; if (!File.Exists(tagFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFilePath); } var hist = new Dictionary<int, int>(); var scanSet = new HashSet<int>(); HashSet<string> proteinSetForThisScan = null; var prevScan = -1; var totalNumMatches = 0L; var isHeader = true; foreach (var line in File.ReadAllLines(tagFilePath)) { if (isHeader) { isHeader = false; continue; } var token = line.Split('\t'); if (token.Length < 3) continue; var scan = Convert.ToInt32(token[0]); var proteinId = scanToProtein.ContainsKey(scan) ? scanToProtein[scan] : null; if (scan != prevScan) { if (proteinSetForThisScan != null) { var numMatches = proteinSetForThisScan.Count; int numOcc; if (hist.TryGetValue(numMatches, out numOcc)) hist[numMatches] = numOcc + 1; else hist.Add(numMatches, 1); } prevScan = scan; proteinSetForThisScan = new HashSet<string>(); } scanSet.Add(scan); var tag = token[1]; if (tag.Length < minTagLength) continue; if (proteinSetForThisScan == null) continue; var numMatchesForThisTag = 0; foreach (var matchedProtein in searchableDb.FindAllMatchedSequenceIndices(tag) .Select(index => fastaDb.GetProteinName(index))) { proteinSetForThisScan.Add(matchedProtein); ++numMatchesForThisTag; if (proteinId != null && matchedProtein.Equals(proteinId)) { idTag[scan] = true; } } totalNumMatches += numMatchesForThisTag; // if (numMatchesForThisTag > 10) // { // Console.WriteLine("{0}\t{1}", tag, numMatchesForThisTag); // } } if (proteinSetForThisScan != null) { var numMatches = proteinSetForThisScan.Count; int numOcc; if (hist.TryGetValue(numMatches, out numOcc)) hist[numMatches] = numOcc + 1; else hist.Add(numMatches, 1); } Console.WriteLine("AvgNumMatches: {0}", totalNumMatches/(float)scanSet.Count); Console.WriteLine("Histogram:"); foreach (var entry in hist.OrderBy(e => e.Key)) { Console.WriteLine("{0}\t{1}", entry.Key, entry.Value); } Console.WriteLine("NumId: {0}", idTag.Count); Console.WriteLine("NumIdByTag: {0}", idTag.Select(e => e.Value).Count(v => v)); }
public List<ProteinSpectrumMatch> ReadMsAlignResult(string msAlignResultTablePath, int maxPrsm) { var parser = new TsvFileParser(msAlignResultTablePath); var prsmList = new List<ProteinSpectrumMatch>(); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Peptide")[i]; var scanNum = int.Parse(parser.GetData("Scan(s)")[i]); var mass = double.Parse(parser.GetData("Precursor_mass")[i]); var protNameDesc = parser.GetData("Protein_name")[i]; var k = protNameDesc.IndexOf(' '); var protName = (k < 0) ? protNameDesc : protNameDesc.Substring(0, k); var protDesc = (k < 0) ? protNameDesc : protNameDesc.Substring(k+1); var firstResId = int.Parse(parser.GetData("First_residue")[i]); var lastResId = int.Parse(parser.GetData("Last_residue")[i]); var score = double.Parse(parser.GetData("#matched_fragment_ions")[i]); var sequenceText = parser.GetData("Peptide")[i]; var charge = int.Parse(parser.GetData("Charge")[i]); var evalue = double.Parse(parser.GetData("E-value")[i]); var fdr = Double.Parse(parser.GetData("FDR")[i]); if (fdr > FdrCutoff) continue; var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsAlign) { SequenceText = sequenceText, SpectralEvalue = evalue, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) break; } return prsmList; }
private void OutputMergedResult(StreamWriter writer, TsvFileParser parser, FastaDatabase fastaDb) { var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score"); var qValColumn = parser.GetData("QValue"); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Sequence")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); var mass = double.Parse(parser.GetData("Mass")[i]); var protName = parser.GetData("ProteinName")[i]; var protDesc = fastaDb.GetProteinDescription(protName); var firstResId = int.Parse(parser.GetData("Start")[i]); var lastResId = int.Parse(parser.GetData("End")[i]); var score = double.Parse(scoreColumn[i]); var mod = parser.GetData("Modifications")[i]; var qvalue = (qValColumn != null) ? qValColumn[i] : "0"; writer.Write(scanNum); writer.Write("\t"); writer.Write(sequence); writer.Write("\t"); writer.Write(mod); writer.Write("\t"); writer.Write(mass); writer.Write("\t"); writer.Write(protName); writer.Write("\t"); writer.Write(protDesc); writer.Write("\t"); writer.Write(firstResId); writer.Write("\t"); writer.Write(lastResId); writer.Write("\t"); writer.Write(score); writer.Write("\t"); writer.Write(qvalue); writer.Write("\n"); } }
public void CompareRtFusion() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Fusion const string qeDdaResult = @"D:\Research\Data\UW\Fusion\DDA_Summary.tsv"; const string qeDiaResult = @"D:\Research\Data\UW\Fusion\DIA_Summary.tsv"; const string specFileDda = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw"; var ddaReader = new XCaliburReader(specFileDda); const string specFileDia = @"D:\Research\Data\UW\Fusion\WT_D_DIA_130412091220.raw"; var diaReader = new XCaliburReader(specFileDia); const string resultPath1 = qeDdaResult; const string resultPath2 = qeDiaResult; var result1 = new TsvFileParser(resultPath1); var result2 = new TsvFileParser(resultPath2); const double pepQValueThreshold = 0.01; var vennDiagram = new VennDiagram<string>(result1.GetPeptides(pepQValueThreshold), result2.GetPeptides(pepQValueThreshold)); var intersectionPeptides = vennDiagram.Intersection; var result1Peptides = result1.GetData("Peptide"); var result1ScanNums = result1.GetData("ScanNum"); var result2Peptides = result2.GetData("Peptide"); var result2ScanNums = result2.GetData("ScanNum"); Console.WriteLine("Peptide\tScanNum1\tScanNum2\tRt1\tRt2"); foreach (var peptide in intersectionPeptides) { var index1 = result1Peptides.IndexOf(peptide); var index2 = result2Peptides.IndexOf(peptide); var scanNum1 = Convert.ToInt32(result1ScanNums[index1]); var scanNum2 = Convert.ToInt32(result2ScanNums[index2]); var reader1 = ddaReader; var reader2 = diaReader; Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", peptide.Replace("C+57.021", "C"), scanNum1, scanNum2, reader1.RtFromScanNum(scanNum1), reader2.RtFromScanNum(scanNum2)); } }