public IList <SpectrumMatch> Read() { var specMatches = new List <SpectrumMatch>(); var tsvFile = new TsvFileParser(_fileName); var precursorCharges = tsvFile.GetData(PrecursorChargeHeader); var scans = tsvFile.GetData(ScanHeader); var peptides = tsvFile.GetData(TopDownPeptideHeader); if (peptides != null) { var peptideSet = new HashSet <string>(); const double filterThreshold = QValueThreshold; var filterValues = tsvFile.GetData(QValueHeader); var aset = new AminoAcidSet(); for (int i = 0; i < peptides.Count; i++) { if (Convert.ToDouble(filterValues[i]) > filterThreshold || peptideSet.Contains(peptides[i])) { continue; } peptideSet.Add(peptides[i]); var scanNum = Convert.ToInt32(scans[i]); int precursorCharge = Convert.ToInt32(precursorCharges[i]); specMatches.Add(new SpectrumMatch(new Sequence(peptides[i], aset), _lcms, scanNum, precursorCharge, _decoy)); } } return(specMatches); }
public void TestVennDiagram() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string result1Path = @"C:\cygwin\home\kims336\Data\QCShewQE\NoMod_NTT1.tsv"; const string result2Path = @"C:\cygwin\home\kims336\Data\QCShewQE\Ic_NTT1_Test\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28_IcTda.tsv"; if (!File.Exists(result1Path)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, result1Path); } if (!File.Exists(result2Path)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, result2Path); } const double pepQValueThreshold = 0.01; var result1 = new TsvFileParser(result1Path); var result2 = new TsvFileParser(result2Path); var vennDiagram = new VennDiagram <string>(result1.GetPeptides(pepQValueThreshold), result2.GetPeptides(pepQValueThreshold)); var intersectionPeptides = vennDiagram.Intersection; Console.WriteLine(vennDiagram.Set1 + " " + vennDiagram.Set2); Console.WriteLine(vennDiagram.Set1 + " " + vennDiagram.Intersection + " " + vennDiagram.Set2Only); }
private void Read(string ms1FtFileName) { var ftFileParser = new TsvFileParser(ms1FtFileName); var monoMassArr = ftFileParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray(); var minScanArray = ftFileParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray(); var maxScanArray = ftFileParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray(); var repScanArray = ftFileParser.GetData("RepScan").Select(s => Convert.ToInt32(s)).ToArray(); var minChargeArray = ftFileParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray(); var maxChargeArray = ftFileParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray(); var scoreArray = ftFileParser.GetData("LikelihoodRatio").Select(Convert.ToDouble).ToArray(); var featureCountFiltered = 0; for (var i = 0; i < monoMassArr.Length; i++) { //if (flagArray[i] == 0 && probArray[i] < _minProbability) continue; if (scoreArray[i] < _minLikelihoodRatio) { continue; } featureCountFiltered++; var monoMass = monoMassArr[i]; _lcMsChargeMap.SetMatches(monoMass, minScanArray[i], maxScanArray[i], repScanArray[i], minChargeArray[i], maxChargeArray[i]); } // NOTE: The DMS Analysis Manager looks for this statistic; do not change it Console.Write(@"{0}/{1} features loaded...", featureCountFiltered, monoMassArr.Length); _lcMsChargeMap.CreateMassToScanNumMap(); }
public void TestReadingTmtResultFile() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string filePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSGFPlusResultTMT10.tsv"; if (!File.Exists(filePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, filePath); } var parser = new TsvFileParser(filePath); var pepStrs = parser.GetData("Peptide"); var formulaStrs = parser.GetData("Formula"); Assert.True(pepStrs.Count == formulaStrs.Count); var peptides = pepStrs.Select(Sequence.GetSequenceFromMsGfPlusPeptideStr).ToList(); var formulae = formulaStrs.Select(Composition.Parse).ToList(); Assert.True(peptides.Count == formulae.Count); for (var i = 0; i < peptides.Count; i++) { Assert.True((peptides[i].Composition + Composition.H2O).Equals(formulae[i])); } }
public void GenerateVennDiagrams() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // DIA const string dir = @"H:\Research\EDRN\Ic\DIA_Replicate"; const string rep1 = dir + @"\EDRN_Serum_07_DIA_1_01_13Nov13_Samwise_13-07-28_IcTda.tsv"; //const string rep2 = dir + @"\EDRN_Serum_07_DIA_1_02_13Nov13_Samwise_13-07-28_IcTda.tsv"; //const string rep3 = dir + @"\EDRN_Serum_07_DIA_1_03_13Nov13_Samwise_13-07-28_IcTda.tsv"; const string rep4 = dir + @"\EDRN_Serum_07_DIA_1_04_13Nov13_Samwise_13-07-28_IcTda.tsv"; //const string rep5 = dir + @"\EDRN_Serum_07_DIA_1_05_18Nov13_Samwise_13-07-28_IcTda.tsv"; const string resultPath1 = rep1; const string resultPath2 = rep4; var result1 = new TsvFileParser(resultPath1); var result2 = new TsvFileParser(resultPath2); const double pepQValueThreshold = 0.01; var vennDiagram = new VennDiagram <string>(result1.GetPeptidesAboveQValueThreshold(pepQValueThreshold), result2.GetPeptidesAboveQValueThreshold(pepQValueThreshold)); Console.WriteLine("{0}\t{1}\t{2}", vennDiagram.Set1Only.Count, // + vennDiagram.Intersection.Count, vennDiagram.Intersection.Count, vennDiagram.Set2Only.Count //+ vennDiagram.Intersection.Count ); }
private void Parse(string tagFilePath) { var tagParser = new TsvFileParser(tagFilePath); var scan = tagParser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray(); var sequence = tagParser.GetData("SequenceTag").ToArray(); var isPrefix = tagParser.GetData("IsPrefix").Select(s => s.Equals("1")).ToArray(); var flankingMass = tagParser.GetData("FlankingMass").Select(Convert.ToDouble).ToArray(); for (var i = 0; i < tagParser.NumData; i++) { if (sequence[i].Length < _minTagLength) { continue; } var tag = new SequenceTag.SequenceTag(scan[i], sequence[i], isPrefix[i], flankingMass[i]); IList <SequenceTag.SequenceTag> tagList; if (_scanToTags.TryGetValue(scan[i], out tagList)) { if (tagList.Count < _numTagsPerScan) { tagList.Add(tag); } } else { _scanToTags.Add(scan[i], new List <SequenceTag.SequenceTag> { tag }); } } }
public void TestReadingTmtResultFile() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var filePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, "MSGFPlusResultTMT10.tsv"); if (!File.Exists(filePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, filePath); } var parser = new TsvFileParser(filePath); var pepStrs = parser.GetData("Peptide"); var formulaStrs = parser.GetData("Formula"); Assert.True(pepStrs.Count == formulaStrs.Count); var peptides = pepStrs.Select(Sequence.GetSequenceFromMsGfPlusPeptideStr).ToList(); var formulae = formulaStrs.Select(Composition.Parse).ToList(); Assert.True(peptides.Count == formulae.Count); for (var i = 0; i < peptides.Count; i++) { Assert.True((peptides[i].Composition + Composition.H2O).Equals(formulae[i])); } }
/* * public static List<LcMsFeature> LoadProMexResult(string featureFilePath) * { * return LoadProMexResult(0, featureFilePath); * } */ public static List <LcMsFeature> LoadProMexResult(int dataId, string featureFilePath, LcMsRun run, double minMass = 2000, double maxMass = 50000) { var featureList = new List <LcMsFeature>(); var tsvReader = new TsvFileParser(featureFilePath); //var run = (rawFilePath == null || !File.Exists(rawFilePath)) ? null : PbfLcMsRun.GetLcMsRun(rawFilePath); var featureIds = tsvReader.GetData("FeatureID"); var minScans = tsvReader.GetData("MinScan"); var maxScans = tsvReader.GetData("MaxScan"); var abu = tsvReader.GetData("Abundance"); var minCharges = tsvReader.GetData("MinCharge"); var maxCharges = tsvReader.GetData("MaxCharge"); var monoMass = tsvReader.GetData("MonoMass"); var minElutionTime = tsvReader.GetData("MinElutionTime"); var maxElutionTime = tsvReader.GetData("MaxElutionTime"); var repCharges = tsvReader.GetData("RepCharge"); var repScans = tsvReader.GetData("RepScan"); var repMzs = tsvReader.GetData("RepMz"); var scores = tsvReader.GetData("LikelihoodRatio"); for (var i = 0; i < tsvReader.NumData; i++) { var abundance = double.Parse(abu[i]); var repMass = double.Parse(monoMass[i]); if (repMass < minMass || repMass > maxMass) { continue; } var minCharge = int.Parse(minCharges[i]); var maxCharge = int.Parse(maxCharges[i]); var minScan = int.Parse(minScans[i]); var maxScan = int.Parse(maxScans[i]); var fid = int.Parse(featureIds[i]); var repCharge = (repCharges != null) ? int.Parse(repCharges[i]) : (int)Math.Round(0.5 * (minCharge + maxCharge)); var repMz = (repMzs != null) ? double.Parse(repMzs[i]) : (repMass / repCharge) + Constants.Proton; var repScanNum = (repScans != null) ? int.Parse(repScans[i]) : minScan; var score = (scores != null) ? double.Parse(scores[i]) : 0; var minEt = double.Parse(minElutionTime[i]); var maxEt = double.Parse(maxElutionTime[i]); var minNet = minEt / run.GetElutionTime(run.MaxLcScan); var maxNet = maxEt / run.GetElutionTime(run.MaxLcScan); var feature = new LcMsFeature(repMass, repCharge, repMz, repScanNum, abundance, minCharge, maxCharge, minScan, maxScan, minEt, maxEt, minNet, maxNet) { FeatureId = fid, DataSetId = dataId, Score = score, }; featureList.Add(feature); } return(featureList); }
public void TestPredictPTMfromMs1ft() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = @"\\protoapps\UserData\Jungkap\FeatureFinding\ProMex_v1.1\test\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } // const string ms1ftFilePath = @"\\protoapps\UserData\Jungkap\FeatureFinding\ProMex_v1.1\test\QC_Shew_Intact_26Sep14_Bane_C2Column3.ms1ft"; var parser = new TsvFileParser(resultFilePath); var sequences = parser.GetData("Sequence"); var modifications = parser.GetData("Modifications"); var scanNums = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var qValues = parser.GetData("QValue").Select(s => Convert.ToDouble(s)).ToArray(); var nMacthed = parser.GetData("#MatchedFragments"); var aaSet = new AminoAcidSet(); var ptmList = new List <Tuple <int, double, double> >(); for (var i = 0; i < parser.NumData; i++) { if (qValues[i] > 0.01) { continue; } //var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O; var seq = new Sequence(sequences[i], aaSet); var sequenceComp = seq.Composition + Composition.H2O; var modComposition = Composition.Zero; var modsStr = modifications[i]; if (modsStr.Length == 0) { continue; } var mods = modsStr.Split(','); foreach (var modStr in mods.Where(str => str.Length > 0)) { var modName = modStr.Split()[0]; var mod = Modification.Get(modName); modComposition += mod.Composition; } Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", scanNums[i], sequenceComp.Mass, modComposition.Mass, nMacthed[i], sequences[i], modsStr); //var compFromSeqAndMods = sequenceComp + modComposition; //Assert.True(compFromSeqAndMods.Equals(compositions[i])); ptmList.Add(new Tuple <int, double, double>(scanNums[i], sequenceComp.Mass, modComposition.Mass)); } //var featureParser = new TsvFileParser(ms1ftFilePath); //var minScan = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray(); //var maxScan = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray(); //var monoMass = featureParser.GetData("MonoMass").Select(s => Convert.ToDouble(s)).ToArray(); }
public void DiaRankScore() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataFile = @"\\protoapps\UserData\Wilkins\BottomUp\HCD_QCShew\raw\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; const string tsvFile = @"\\protoapps\UserData\Wilkins\BottomUp\HCD_QCShew\tsv\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.tsv"; if (!File.Exists(dataFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataFile); } if (!File.Exists(tsvFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tsvFile); } var parser = new TsvFileParser(tsvFile); var sequences = parser.GetData("Peptide"); var charges = parser.GetData("Charge"); var scans = parser.GetData("ScanNum"); var lcms = InMemoryLcMsRun.GetLcMsRun(dataFile, 0, 0); var rankScorer = new DiaRankScore( @"C:\Users\wilk011\Documents\DataFiles\TestFolder\HCD_QExactive_Tryp.txt"); using ( var outFile = new StreamWriter(@"C:\Users\wilk011\Documents\DataFiles\TestFolder\HCD_QCShew_Score_2.txt")) { outFile.WriteLine("Target\tDecoy"); for (int i = 0; i < sequences.Count; i++) { string sequenceStr = sequences[i]; int charge = Convert.ToInt32(charges[i]); int scan = Convert.ToInt32(scans[i]); var sequence = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequenceStr); var decoySeq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequenceStr); decoySeq.Reverse(); var decoyStr = decoySeq.Aggregate("", (current, aa) => current + aa); decoyStr = SimpleStringProcessing.Mutate(decoyStr, sequence.Count / 2); decoySeq = Sequence.GetSequenceFromMsGfPlusPeptideStr(decoyStr); var sequenceScore = rankScorer.GetScore(sequence, charge, scan, lcms); var decoyScore = rankScorer.GetScore(decoySeq, charge, scan, lcms); outFile.WriteLine("{0}\t{1}", sequenceScore, decoyScore); } } }
public void TestTempCompRefLcMsFeatureAlign() { const string dataFolder = @"D:\MassSpecFiles\CompRef"; const string fastaFilePath = @"D:\MassSpecFiles\CompRef\db\ID_003278_4B4B3CB1.fasta"; var fastaDb = new FastaDatabase(fastaFilePath); fastaDb.Read(); var fileEntries = Directory.GetFiles(dataFolder); var dataset = (from fileName in fileEntries where fileName.EndsWith("pbf") select Path.GetFileNameWithoutExtension(fileName)).ToList(); dataset.Sort(); for (var i = 0; i < dataset.Count; i++) { var writer = new StreamWriter(string.Format(@"D:\MassSpecFiles\CompRef\MsPathFinderMerged\{0}_IcTda.tsv", dataset[i])); writer.Write("Scan"); writer.Write("\t"); writer.Write("Sequence"); writer.Write("\t"); writer.Write("Modifications"); writer.Write("\t"); writer.Write("Mass"); writer.Write("\t"); writer.Write("ProteinName"); writer.Write("\t"); writer.Write("ProteinDesc"); writer.Write("\t"); writer.Write("Start"); writer.Write("\t"); writer.Write("End"); writer.Write("\t"); writer.Write("#MatchedFragments"); writer.Write("\t"); writer.Write("QValue"); writer.Write("\n"); var path1 = string.Format(@"D:\MassSpecFiles\CompRef\MsPathFinder\{0}_IcTda.tsv", dataset[i]); var parser1 = new TsvFileParser(path1); OutputMergedResult(writer, parser1, fastaDb); var path2 = string.Format(@"D:\MassSpecFiles\CompRef\seqtag\{0}_tagmatch.tsv", dataset[i]); var parser2 = new TsvFileParser(path2); OutputMergedResult(writer, parser2, fastaDb); writer.Close(); } }
public void CompareIpaIc() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string resultDir = @"D:\Research\Data\UW\QExactive\Ic_NTT2_03"; if (!Directory.Exists(resultDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultDir); } var targetPeptides = new HashSet <string>(); foreach (var icResultFilePath in Directory.GetFiles(resultDir, "*DIA*IcTarget.tsv")) { var icParser = new TsvFileParser(icResultFilePath); foreach (var peptide in icParser.GetData("Sequence")) { targetPeptides.Add(peptide); } } const string ipaResultPath = @"D:\Research\Data\UW\QExactive\DIA_All_Summary.tsv"; if (!File.Exists(ipaResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, methodName); } var parser = new TsvFileParser(ipaResultPath); var ipaPeptides = parser.GetPeptides(0.005).Select(p => p.Replace("C+57.021", "C")); var ipaOnly = 0; var both = 0; foreach (var ipaPeptide in ipaPeptides) { if (targetPeptides.Contains(ipaPeptide)) { ++both; } else { ++ipaOnly; Console.WriteLine(ipaPeptide); } } Console.WriteLine("Both: {0}, IpaOnly: {1}, Sum: {2}", both, ipaOnly, both + ipaOnly); }
public void CompareRtFusion() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Fusion const string qeDdaResult = @"D:\Research\Data\UW\Fusion\DDA_Summary.tsv"; const string qeDiaResult = @"D:\Research\Data\UW\Fusion\DIA_Summary.tsv"; const string specFileDda = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw"; var ddaReader = new XCaliburReader(specFileDda); const string specFileDia = @"D:\Research\Data\UW\Fusion\WT_D_DIA_130412091220.raw"; var diaReader = new XCaliburReader(specFileDia); const string resultPath1 = qeDdaResult; const string resultPath2 = qeDiaResult; var result1 = new TsvFileParser(resultPath1); var result2 = new TsvFileParser(resultPath2); const double pepQValueThreshold = 0.01; var vennDiagram = new VennDiagram <string>(result1.GetPeptides(pepQValueThreshold), result2.GetPeptides(pepQValueThreshold)); var intersectionPeptides = vennDiagram.Intersection; var result1Peptides = result1.GetData("Peptide"); var result1ScanNums = result1.GetData("ScanNum"); var result2Peptides = result2.GetData("Peptide"); var result2ScanNums = result2.GetData("ScanNum"); Console.WriteLine("Peptide\tScanNum1\tScanNum2\tRt1\tRt2"); foreach (var peptide in intersectionPeptides) { var index1 = result1Peptides.IndexOf(peptide); var index2 = result2Peptides.IndexOf(peptide); var scanNum1 = Convert.ToInt32(result1ScanNums[index1]); var scanNum2 = Convert.ToInt32(result2ScanNums[index2]); var reader1 = ddaReader; var reader2 = diaReader; Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", peptide.Replace("C+57.021", "C"), scanNum1, scanNum2, reader1.RtFromScanNum(scanNum1), reader2.RtFromScanNum(scanNum2)); } }
public void TestInitialScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string icResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\Ic_NTT2_03_NoMod_NoRescoring\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28_IcTarget.tsv"; if (!File.Exists(icResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, icResultPath); } var icParser = new TsvFileParser(icResultPath); var icScans = icParser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var icPeptides = icParser.GetData("Sequence"); var icScore = icParser.GetData("Score").Select(s => Convert.ToInt32(s)).ToArray(); var map = new Dictionary <string, int>(); for (var i = 0; i < icParser.NumData; i++) { map.Add(icScans[i] + ":" + icPeptides[i], icScore[i]); } const string msgfPlusResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\NoMod.tsv"; if (!File.Exists(msgfPlusResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, msgfPlusResultPath); } var msgfPlusResults = new MsGfResults(msgfPlusResultPath); var matches = msgfPlusResults.GetMatchesAtPsmFdr(0.01); //Console.WriteLine("NumMatches: {0}", matches.Count); Console.WriteLine("ScanNum\tPeptide\tSpecEValue\tIcScore"); foreach (var match in matches) { var scanNum = match.ScanNum; var peptide = match.Peptide; var specEValue = match.SpecEValue; int score; if (!map.TryGetValue(scanNum + ":" + peptide, out score)) { score = -1; } Console.WriteLine("{0}\t{1}\t{2}\t{3}", scanNum, peptide, specEValue, score); } }
public List <ProteinSpectrumMatch> ReadMsGfPlusResult(string msgfResultPath, int maxPrsm) { var parser = new TsvFileParser(msgfResultPath); var prsmList = new List <ProteinSpectrumMatch>(); var prevScanNum = -1; for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Peptide")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); if (prevScanNum == scanNum) { continue; } prevScanNum = scanNum; var mz = double.Parse(parser.GetData("PrecursorMZ")[i]); var protName = parser.GetData("Protein")[i]; var protDesc = ""; var score = double.Parse(parser.GetData("MSGFScore")[i]); var charge = int.Parse(parser.GetData("Charge")[i]); var seq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequence); var sequenceText = GetSequenceText(seq); var mass = (mz - Constants.Proton) * charge; var firstResId = 0; var lastResId = 0; var fdr = Double.Parse(parser.GetData("QValue")[i]); if (fdr > FdrCutoff) { continue; } var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsGfPlus) { SequenceText = sequenceText, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) { break; } } return(prsmList); }
public string ProcessFile(string rawFile, string resultFile, string methodName) { if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, rawFile); return("\n"); } if (!File.Exists(resultFile)) { Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, resultFile); return("\n"); } var tsvParser = new TsvFileParser(resultFile); var headerList = tsvParser.GetHeaders(); var tsvData = tsvParser.GetAllData(); var ms2ScanNumbers = tsvData["Scan"]; var run = PbfLcMsRun.GetLcMsRun(rawFile, 0, 0); var resultLine = ""; for (int i = 0; i < ms2ScanNumbers.Count; i++) { var scanNum = Int32.Parse(ms2ScanNumbers[i]); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum); var qValue = Double.Parse(tsvData["QValue"].ElementAt(tsvIndex)); if (qValue > 0.01) { continue; } var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim(); var seqMod = tsvData["Modifications"].ElementAt(tsvIndex).Trim(); var matchedFrags = tsvData["#MatchedFragments"].ElementAt(tsvIndex).Trim(); var aaSet = new AminoAcidSet(); var sequence = Sequence.CreateSequence(seqStr, seqMod, aaSet); var tol = new Tolerance(10); var sequenceFinder = new SequenceTagIndexFinder(tol, 1, 10); var results = sequenceFinder.GetLongestSequence(spectrum, sequence); resultLine += String.Format("{0},{1},{2},{3},{4},{5},{6},{7},{8},\n", scanNum, matchedFrags, seqStr, results.Item1, results.Item2, results.Item3, results.Item4, results.Item5, results.Item6); } return(resultLine); }
private bool Parse(string fileName) { var parser = new TsvFileParser(fileName); var scan = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var pre = parser.GetData("Pre").Where(s => s.Length == 1).Select(p => p[0]).ToArray(); if (pre.Length != parser.NumData) { return(false); } var sequence = parser.GetData("Sequence").ToArray(); var post = parser.GetData("Post").Where(s => s.Length == 1).Select(p => p[0]).ToArray(); if (post.Length != parser.NumData) { return(false); } var mod = parser.GetData("Modifications").ToArray(); var composition = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var proteinName = parser.GetData("ProteinName").ToArray(); var proteinDesc = parser.GetData("ProteinDesc").ToArray(); var proteinLength = parser.GetData("ProteinLength").Select(s => Convert.ToInt32(s)).ToArray(); var start = parser.GetData("Start").Select(s => Convert.ToInt32(s)).ToArray(); var end = parser.GetData("End").Select(s => Convert.ToInt32(s)).ToArray(); var charge = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var mostAbundantIsotopeMz = parser.GetData("MostAbundantIsotopeMz").Select(Convert.ToDouble).ToArray(); var mass = parser.GetData("Mass").Select(Convert.ToDouble).ToArray(); var numMatchedFragment = parser.GetData("#MatchedFragments").Select(s => Convert.ToInt32(s)).ToArray(); var qValue = parser.GetData("QValue").Select(Convert.ToDouble).ToArray(); var pepQValue = parser.GetData("PepQValue").Select(Convert.ToDouble).ToArray(); for (var i = 0; i < parser.NumData; i++) { var id = new MsPathFinderId(scan[i], pre[i], sequence[i], post[i], mod[i], composition[i], proteinName[i], proteinDesc[i], proteinLength[i], start[i], end[i], charge[i], mostAbundantIsotopeMz[i], mass[i], numMatchedFragment[i], qValue[i], pepQValue[i]) ; _idList.Add(id); if (!_scanNumToPrSm.ContainsKey(scan[i])) { _scanNumToPrSm.Add(scan[i], id); } } return(true); }
public List <ProteinSpectrumMatch> ReadMsAlignResult(string msAlignResultTablePath, int maxPrsm) { var parser = new TsvFileParser(msAlignResultTablePath); var prsmList = new List <ProteinSpectrumMatch>(); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Peptide")[i]; var scanNum = int.Parse(parser.GetData("Scan(s)")[i]); var mass = double.Parse(parser.GetData("Precursor_mass")[i]); var protNameDesc = parser.GetData("Protein_name")[i]; var k = protNameDesc.IndexOf(' '); var protName = (k < 0) ? protNameDesc : protNameDesc.Substring(0, k); var protDesc = (k < 0) ? protNameDesc : protNameDesc.Substring(k + 1); var firstResId = int.Parse(parser.GetData("First_residue")[i]); var lastResId = int.Parse(parser.GetData("Last_residue")[i]); var score = double.Parse(parser.GetData("#matched_fragment_ions")[i]); var sequenceText = parser.GetData("Peptide")[i]; var charge = int.Parse(parser.GetData("Charge")[i]); var evalue = double.Parse(parser.GetData("E-value")[i]); var fdr = Double.Parse(parser.GetData("FDR")[i]); if (fdr > FdrCutoff) { continue; } var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsAlign) { SequenceText = sequenceText, SpectralEvalue = evalue, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) { break; } } return(prsmList); }
public void TestClusterCentricSearch() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string pfResultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V4_JP_Len500\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(pfResultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pfResultFilePath); } var tsvReader = new TsvFileParser(pfResultFilePath); var ms2Scans = tsvReader.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var compositions = tsvReader.GetData("Composition").ToArray(); var qValues = tsvReader.GetData("QValue").Select(Convert.ToDouble).ToArray(); var compScanTable = new Dictionary <string, IList <int> >(); for (var i = 0; i < qValues.Length; i++) { var qValue = qValues[i]; if (qValue > 0.01) { break; } IList <int> scanNums; if (compScanTable.TryGetValue(compositions[i], out scanNums)) { scanNums.Add(ms2Scans[i]); } else { compScanTable.Add(compositions[i], new List <int> { ms2Scans[i] }); } } Console.Write("NumCompositions: {0}", compScanTable.Keys.Count); //const string featureFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V4_JP_Len500\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; }
private void Read(string isosFileName) { var icrToolsparser = new TsvFileParser(isosFileName, ','); var monoMassArr = icrToolsparser.GetData("monoisotopic_mw").Select(Convert.ToDouble).ToArray(); var scanArray = icrToolsparser.GetData("scan_num").Select(s => Convert.ToInt32(s)).ToArray(); var chargeArray = icrToolsparser.GetData("charge").Select(s => Convert.ToInt32(s)).ToArray(); var fitStringArr = icrToolsparser.GetData("fit"); var fitArray = fitStringArr == null ? null : icrToolsparser.GetData("fit").Select(Convert.ToDouble).ToArray(); var featureCountFiltered = 0; var minMass = double.MaxValue; var maxMass = 0.0; for (var i = 0; i < monoMassArr.Length; i++) { if (fitArray != null && fitArray[i] > _fitScoreThreshold || chargeArray[i] <= 1) { continue; } featureCountFiltered++; var scan = scanArray[i]; var monoMass = monoMassArr[i]; if (minMass > monoMass) { minMass = monoMass; } if (maxMass < monoMass) { maxMass = monoMass; } var minScan = _run.GetPrevScanNum(scan, 1); var maxScan = _run.GetNextScanNum(scan, 1); _lcMsMatchMap.SetMatches(monoMass, minScan, maxScan); } Console.Write(@"{0}/{1} features loaded...", featureCountFiltered, monoMassArr.Length); _lcMsMatchMap.CreateSequenceMassToMs2ScansMap(_run, _massTolerance, minMass, maxMass); }
public void GenerateVennDiagramsPeMmr() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // No PE-MMR //const string noPeMmr = @"D:\Research\Data\PEMMR\iTRAQ_N33T34_10ug_100cm_300min_C2_061213.tsv"; // PE-MMR Scan based FDR //const string scanBasedPeMmr = @"D:\Research\Data\PEMMR\NewSpectra\iTRAQ_N33T34_10ug_100cm_300min_C2_061213_MX_PEMMR_UMCID_ScanFDR.tsv"; // UMC based FDR const string umcBasedPeMmr = @"D:\Research\Data\PEMMR\NewSpectra\iTRAQ_N33T34_10ug_100cm_300min_C2_061213_MX_PEMMR_UMCID_UMCFDR.tsv"; // IPA const string ipa = @"D:\Research\Data\PEMMR\Ox\IPA_Summary_TargetOnly.tsv"; const string resultPath1 = umcBasedPeMmr; const string resultPath2 = ipa; var result1 = new TsvFileParser(resultPath1); var result2 = new TsvFileParser(resultPath2); const double pepQValueThreshold = 0.01; var vennDiagram = new VennDiagram <string>(result1.GetPeptides(pepQValueThreshold), result2.GetPeptides(pepQValueThreshold)); Console.WriteLine("{0}\t{1}\t{2}", vennDiagram.Set1Only.Count + vennDiagram.Intersection.Count, vennDiagram.Intersection.Count, vennDiagram.Set2Only.Count + vennDiagram.Intersection.Count); Console.WriteLine("{0}\t{1}\t{2}", vennDiagram.Set1Only.Count, vennDiagram.Intersection.Count, vennDiagram.Set2Only.Count); foreach (var peptide in vennDiagram.Set2Only) { Console.WriteLine(peptide); var peptides = result2.GetData("Peptide"); } }
public void ValidateIcResultsWithModifications() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var parser = new TsvFileParser(resultFilePath); var sequences = parser.GetData("Sequence"); var modifications = parser.GetData("Modifications"); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray(); var aaSet = new AminoAcidSet(); for (var i = 0; i < parser.NumData; i++) { var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O; var modComposition = Composition.Zero; var modsStr = modifications[i].Substring(1, modifications[i].Length - 2); var mods = modsStr.Split(','); foreach (var modStr in mods) { if (modStr.Length == 0) { continue; } var modName = modStr.Split()[0]; var mod = Modification.Get(modName); modComposition += mod.Composition; } var compFromSeqAndMods = sequenceComp + modComposition; Assert.True(compFromSeqAndMods.Equals(compositions[i])); } }
public void TestScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var rawFile = @"\\protoapps\UserData\Jungkap\Joshua\testData\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; var resultFile = @"\\protoapps\UserData\Jungkap\Joshua\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, rawFile); return; } if (!File.Exists(resultFile)) { Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, resultFile); return; } var tsvParser = new TsvFileParser(resultFile); var tsvData = tsvParser.GetAllData(); var ms2ScanNumbers = tsvData["Scan"]; var run = PbfLcMsRun.GetLcMsRun(rawFile, 0, 0); for (int i = 0; i < 1; i++) { var scanNum = Int32.Parse(ms2ScanNumbers[i]); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum); var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim(); var seqMod = tsvData["Modifications"].ElementAt(tsvIndex).Trim(); var aaSet = new AminoAcidSet(); var sequence = Sequence.CreateSequence(seqStr, seqMod, aaSet); Console.WriteLine(sequence.Count); var score = GetScoreTest(sequence, spectrum); Console.WriteLine(scanNum + ":" + score); } }
private void OutputMergedResult(TextWriter writer, TsvFileParser parser, FastaDatabase fastaDb) { var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score"); var qValColumn = parser.GetData("QValue"); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Sequence")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); var mass = double.Parse(parser.GetData("Mass")[i]); var protName = parser.GetData("ProteinName")[i]; var protDesc = fastaDb.GetProteinDescription(protName); var firstResId = int.Parse(parser.GetData("Start")[i]); var lastResId = int.Parse(parser.GetData("End")[i]); var score = double.Parse(scoreColumn[i]); var mod = parser.GetData("Modifications")[i]; var qvalue = (qValColumn != null) ? qValColumn[i] : "0"; writer.Write(scanNum); writer.Write("\t"); writer.Write(sequence); writer.Write("\t"); writer.Write(mod); writer.Write("\t"); writer.Write(mass); writer.Write("\t"); writer.Write(protName); writer.Write("\t"); writer.Write(protDesc); writer.Write("\t"); writer.Write(firstResId); writer.Write("\t"); writer.Write(lastResId); writer.Write("\t"); writer.Write(score); writer.Write("\t"); writer.Write(qvalue); writer.Write("\n"); } }
public void TestClusterCentricSearch(double qValueThreshold, int expectedNumCompositions) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var resultFilePath = Path.Combine(Utils.DEFAULT_SPEC_FILES_FOLDER, "QC_Shew_Intact_26Sep14_Bane_C2Column3_Excerpt_IcTda.tsv"); var resultFile = Utils.GetTestFile(methodName, resultFilePath); var tsvReader = new TsvFileParser(resultFile.FullName); var ms2Scans = tsvReader.GetData("Scan").Select(s => Convert.ToInt32((string)s)).ToArray(); var compositions = tsvReader.GetData("Composition").ToArray(); var qValues = tsvReader.GetData("QValue").Select(Convert.ToDouble).ToArray(); var compScanTable = new Dictionary <string, IList <int> >(); for (var i = 0; i < qValues.Length; i++) { var qValue = qValues[i]; if (qValue > qValueThreshold) { break; } IList <int> scanNums; if (compScanTable.TryGetValue(compositions[i], out scanNums)) { scanNums.Add(ms2Scans[i]); } else { compScanTable.Add(compositions[i], new List <int> { ms2Scans[i] }); } } Console.Write("NumCompositions: {0}", compScanTable.Keys.Count); Assert.AreEqual(expectedNumCompositions, compScanTable.Keys.Count); }
private void Rescore(string msAlignFilePath, string outputFilePath) { var parser = new TsvFileParser(msAlignFilePath); var sequences = parser.GetData("Peptide"); var scanNums = parser.GetData("Scan(s)").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray(); var rows = parser.GetRows(); var headers = parser.GetHeaders(); using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames()); for (var i = 0; i < parser.NumData; i++) { var row = rows[i]; var seqStr = SimpleStringProcessing.GetStringBetweenDots(sequences[i]); if (seqStr == null || seqStr.Contains("(")) { continue; //TODO: currently ignore ids with modifications } var composition = AASet.GetComposition(seqStr); //var sequence = new Sequence(seqStr, AASet); //if (sequence == null) //{ // Console.WriteLine("Ignore illegal sequence: {0}", seqStr); // continue; //} var charge = charges[i]; var scanNum = scanNums[i]; var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum); if (scores == null) { continue; } writer.WriteLine("{0}\t{1}", row, scores); } } }
private void Rescore(string icResultFilePath, string outputFilePath) { var parser = new TsvFileParser(icResultFilePath); var sequences = parser.GetData("Sequence"); var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var modIndex = parser.GetHeaders().IndexOf("Modifications"); var rows = parser.GetRows(); var headers = parser.GetHeaders(); using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames()); for (var i = 0; i < parser.NumData; i++) { var row = rows[i]; var seqStr = sequences[i]; var charge = charges[i]; var scanNum = scanNums[i]; var composition = compositions[i]; var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum); var token = row.Split('\t'); for (var j = 0; j < token.Length; j++) { if (j != modIndex) { writer.Write(token[j] + "\t"); } else { writer.Write("[" + scores.Modifications + "]" + "\t"); } } writer.WriteLine(scores); } } }
public void GenerateVennDiagrams() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // DIA const string dir = @"H:\Research\DDAPlus\NTT2"; //const string dda1 = dir + @"\20140701_yeast_DDA_01_IcTda.tsv"; //const string dda2 = dir + @"\20140701_yeast_DDA_02_2_IcTda.tsv"; const string ddaPlus1 = dir + @"\20140701_yeast_DDAp_binCharge_01_IcTda.tsv"; const string ddaPlus2 = dir + @"\20140701_yeast_DDAp_binCharge_02_IcTda.tsv"; const string resultPath1 = ddaPlus1; const string resultPath2 = ddaPlus2; if (!File.Exists(resultPath1)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultPath1); } if (!File.Exists(resultPath2)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultPath2); } var result1 = new TsvFileParser(resultPath1); var result2 = new TsvFileParser(resultPath2); const double pepQValueThreshold = 0.01; var vennDiagram = new VennDiagram <string>(result1.GetPeptidesAboveQValueThreshold(pepQValueThreshold), result2.GetPeptidesAboveQValueThreshold(pepQValueThreshold)); Console.WriteLine("{0}\t{1}\t{2}", vennDiagram.Set1Only.Count, // + vennDiagram.Intersection.Count, vennDiagram.Intersection.Count, vennDiagram.Set2Only.Count //+ vennDiagram.Intersection.Count ); }
private double[][] LoadTable(string fname) { if (!File.Exists(fname)) { throw new FileNotFoundException("Missing score datafile: " + fname); } var parser = new TsvFileParser(fname); var table = new double[_massBins.Length][]; for (var i = 0; i < _massBins.Length; i++) { table[i] = new double[NumberOfBins]; for (var k = 0; k < NumberOfBins; k++) { var colData = parser.GetData(string.Format("{0}", k)); table[i][k] = double.Parse(colData[i]); } } return(table); }
public IList <SpectrumMatch> Read() { var specMatches = new List <SpectrumMatch>(); var tsvFile = new TsvFileParser(_fileName); var precursorCharges = tsvFile.GetData(PrecursorChargeHeader); var scans = tsvFile.GetData(ScanHeader); var peptides = tsvFile.GetData(BottomUpPeptideHeader); if (scans == null) { throw new FormatException(); } var pepQValues = tsvFile.GetData(PepQValueHeader); var formulas = tsvFile.GetData(FormulaHeader); var peptideSet = new HashSet <string>(); for (int i = 0; i < peptides.Count; i++) { if (Convert.ToDouble(pepQValues[i]) > PepQValueThreshold || peptideSet.Contains(peptides[i])) { continue; } peptideSet.Add(peptides[i]); var scanNum = Convert.ToInt32(scans[i]); // var spectrum = lcms.GetSpectrum(scanNum); // var spec = spectrum as ProductSpectrum; // if (spec == null || spec.ActivationMethod != Act) continue; int precursorCharge = Convert.ToInt32(precursorCharges[i]); specMatches.Add((formulas != null && formulas[i] != null) ? new SpectrumMatch(peptides[i], DataFileFormat.IcBottomUp, _lcms, scanNum, precursorCharge, _decoy, formulas[i]) : new SpectrumMatch(peptides[i], DataFileFormat.IcBottomUp, _lcms, scanNum, precursorCharge, _decoy)); } return(specMatches); }