public List<ProteinSpectrumMatch> LoadIdentificationResult(string path, ProteinSpectrumMatch.SearchTool tool = ProteinSpectrumMatch.SearchTool.Unknown, int maxPrsm = int.MaxValue) { List<ProteinSpectrumMatch> prsmList = null; if (tool == ProteinSpectrumMatch.SearchTool.Unknown) { if (path.EndsWith("IcTda.tsv")) tool = ProteinSpectrumMatch.SearchTool.MsPathFinder; else if (path.EndsWith("MSAlign_ResultTable.txt")) tool = ProteinSpectrumMatch.SearchTool.MsAlign; else if (path.EndsWith("msgfdb_syn.txt")) tool = ProteinSpectrumMatch.SearchTool.MsGfPlus; } if (tool == ProteinSpectrumMatch.SearchTool.MsAlign) prsmList = ReadMsAlignResult(path, maxPrsm); else if (tool == ProteinSpectrumMatch.SearchTool.MsPathFinder) prsmList = ReadMsPathFinderResult(path, maxPrsm); else if (tool == ProteinSpectrumMatch.SearchTool.MsGfPlus) prsmList = ReadMsGfPlusResult(path, maxPrsm); return prsmList; }
public List<ProteinSpectrumMatch> ReadMsAlignResult(string msAlignResultTablePath, int maxPrsm) { var parser = new TsvFileParser(msAlignResultTablePath); var prsmList = new List<ProteinSpectrumMatch>(); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Peptide")[i]; var scanNum = int.Parse(parser.GetData("Scan(s)")[i]); var mass = double.Parse(parser.GetData("Precursor_mass")[i]); var protNameDesc = parser.GetData("Protein_name")[i]; var k = protNameDesc.IndexOf(' '); var protName = (k < 0) ? protNameDesc : protNameDesc.Substring(0, k); var protDesc = (k < 0) ? protNameDesc : protNameDesc.Substring(k+1); var firstResId = int.Parse(parser.GetData("First_residue")[i]); var lastResId = int.Parse(parser.GetData("Last_residue")[i]); var score = double.Parse(parser.GetData("#matched_fragment_ions")[i]); var sequenceText = parser.GetData("Peptide")[i]; var charge = int.Parse(parser.GetData("Charge")[i]); var evalue = double.Parse(parser.GetData("E-value")[i]); var fdr = Double.Parse(parser.GetData("FDR")[i]); if (fdr > FdrCutoff) continue; var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsAlign) { SequenceText = sequenceText, SpectralEvalue = evalue, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) break; } return prsmList; }
private List<ProteinSpectrumMatch> MergePrsm(List<ProteinSpectrumMatch> targetList) { //var sortedList = targetList.OrderBy(prsm => prsm.ScanNum).ToList(); //var minScan = sortedList.First().ScanNum; var maxScan = targetList.Max(prsm => prsm.ScanNum); var ret = new ProteinSpectrumMatch[maxScan + 1]; foreach (var prsm in targetList) { var scan = prsm.ScanNum; if (ret[scan] == null) { ret[scan] = prsm; } else { if (prsm.SpectralEvalue < ret[scan].SpectralEvalue) ret[scan] = prsm; } } return ret.Where(prsm => prsm != null).ToList(); }
public List<ProteinSpectrumMatch> ReadMsGfPlusResult(string msgfResultPath, int maxPrsm) { var parser = new TsvFileParser(msgfResultPath); var prsmList = new List<ProteinSpectrumMatch>(); var prevScanNum = -1; for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Peptide")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); if (prevScanNum == scanNum) continue; prevScanNum = scanNum; var mz = double.Parse(parser.GetData("PrecursorMZ")[i]); var protName = parser.GetData("Protein")[i]; var protDesc = ""; var score = double.Parse(parser.GetData("MSGFScore")[i]); var charge = int.Parse(parser.GetData("Charge")[i]); var seq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequence); var sequenceText = GetSequenceText(seq); var mass = (mz - Constants.Proton)*charge; var firstResId = 0; var lastResId = 0; var fdr = Double.Parse(parser.GetData("QValue")[i]); if (fdr > FdrCutoff) continue; var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsGfPlus) { SequenceText = sequenceText, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) break; } return prsmList; }
public List<ProteinSpectrumMatch> ReadMsPathFinderResult(string msPathFinderResultPath, int maxPrsm, double minScore = 3, double maxScore = int.MaxValue) { var parser = new TsvFileParser(msPathFinderResultPath); var prsmList = new List<ProteinSpectrumMatch>(); var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score"); var qValColumn = parser.GetData("QValue"); var evalueColumn = parser.GetData("SpecEValue"); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Sequence")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); var mass = double.Parse(parser.GetData("Mass")[i]); var protName = parser.GetData("ProteinName")[i]; var protDesc = parser.GetData("ProteinDesc")[i]; var charge = int.Parse(parser.GetData("Charge")[i]); var firstResId = int.Parse(parser.GetData("Start")[i]); var lastResId = int.Parse(parser.GetData("End")[i]); var score = double.Parse(scoreColumn[i]); var mod = parser.GetData("Modifications")[i]; var evalue = (evalueColumn != null) ? double.Parse(parser.GetData("SpecEValue")[i]) : 0; var pre = parser.GetData("Pre")[i]; var post = parser.GetData("Post")[i]; var proteinLen = int.Parse(parser.GetData("ProteinLength")[i]); if (score < minScore || score > maxScore) continue; if (qValColumn != null) { var fdr = double.Parse(qValColumn[i]); if (fdr > FdrCutoff) continue; } var sequenceText = GetSequenceText(sequence, mod); var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsPathFinder) { SequenceText = sequenceText, Modifications = mod, Pre = pre, Post = post, ProteinLength = proteinLen, SpectralEvalue = evalue, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) break; } return prsmList; }