public List <ProteinSpectrumMatch> ReadMsGfPlusResult(string msgfResultPath, int maxPrsm) { var parser = new TsvFileParser(msgfResultPath); var prsmList = new List <ProteinSpectrumMatch>(); var prevScanNum = -1; for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Peptide")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); if (prevScanNum == scanNum) { continue; } prevScanNum = scanNum; var mz = double.Parse(parser.GetData("PrecursorMZ")[i]); var protName = parser.GetData("Protein")[i]; var protDesc = ""; var score = double.Parse(parser.GetData("MSGFScore")[i]); var charge = int.Parse(parser.GetData("Charge")[i]); var seq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequence); var sequenceText = GetSequenceText(seq); var mass = (mz - Constants.Proton) * charge; var firstResId = 0; var lastResId = 0; var fdr = Double.Parse(parser.GetData("QValue")[i]); if (fdr > FdrCutoff) { continue; } var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsGfPlus) { SequenceText = sequenceText, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) { break; } } return(prsmList); }
public List <ProteinSpectrumMatch> ReadMsAlignResult(string msAlignResultTablePath, int maxPrsm) { var parser = new TsvFileParser(msAlignResultTablePath); var prsmList = new List <ProteinSpectrumMatch>(); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Peptide")[i]; var scanNum = int.Parse(parser.GetData("Scan(s)")[i]); var mass = double.Parse(parser.GetData("Precursor_mass")[i]); var protNameDesc = parser.GetData("Protein_name")[i]; var k = protNameDesc.IndexOf(' '); var protName = (k < 0) ? protNameDesc : protNameDesc.Substring(0, k); var protDesc = (k < 0) ? protNameDesc : protNameDesc.Substring(k + 1); var firstResId = int.Parse(parser.GetData("First_residue")[i]); var lastResId = int.Parse(parser.GetData("Last_residue")[i]); var score = double.Parse(parser.GetData("#matched_fragment_ions")[i]); var sequenceText = parser.GetData("Peptide")[i]; var charge = int.Parse(parser.GetData("Charge")[i]); var evalue = double.Parse(parser.GetData("E-value")[i]); var fdr = Double.Parse(parser.GetData("FDR")[i]); if (fdr > FdrCutoff) { continue; } var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsAlign) { SequenceText = sequenceText, SpectralEvalue = evalue, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) { break; } } return(prsmList); }
public List <ProteinSpectrumMatch> ReadMsPathFinderResult(string msPathFinderResultPath, int maxPrsm, double minScore = 3, double maxScore = int.MaxValue) { var prsmList = new List <ProteinSpectrumMatch>(); foreach (var result in DatabaseSearchResultData.ReadResultsFromFile(msPathFinderResultPath)) { if (result.NumMatchedFragments < minScore || maxScore < result.NumMatchedFragments) { continue; } if (result.HasTdaScores && result.QValue > FdrCutoff) { continue; } var prsm = new ProteinSpectrumMatch(result.Sequence, result.ScanNum, result.Mass, result.Charge, result.ProteinName, result.ProteinDescription, result.Start, result.End, result.NumMatchedFragments, ProteinSpectrumMatch.SearchTool.MsPathFinder) { SequenceText = GetSequenceText(result.Sequence, result.Modifications), Modifications = result.Modifications, Pre = result.Pre, Post = result.Post, ProteinLength = result.ProteinLength, SpectralEvalue = result.SpecEValue, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) { break; } } return(prsmList); }
public List <ProteinSpectrumMatch> ReadMsPathFinderResultOld(string msPathFinderResultPath, int maxPrsm, double minScore = 3, double maxScore = int.MaxValue) { var parser = new TsvFileParser(msPathFinderResultPath); var prsmList = new List <ProteinSpectrumMatch>(); var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score"); var qValColumn = parser.GetData("QValue"); var evalueColumn = parser.GetData("SpecEValue"); for (var i = 0; i < parser.NumData; i++) { var sequence = parser.GetData("Sequence")[i]; var scanNum = int.Parse(parser.GetData("Scan")[i]); var mass = double.Parse(parser.GetData("Mass")[i]); var protName = parser.GetData("ProteinName")[i]; var protDesc = parser.GetData("ProteinDesc")[i]; var charge = int.Parse(parser.GetData("Charge")[i]); var firstResId = int.Parse(parser.GetData("Start")[i]); var lastResId = int.Parse(parser.GetData("End")[i]); var score = double.Parse(scoreColumn[i]); var mod = parser.GetData("Modifications")[i]; var evalue = (evalueColumn != null) ? double.Parse(parser.GetData("SpecEValue")[i]) : 0; var pre = parser.GetData("Pre")[i]; var post = parser.GetData("Post")[i]; var proteinLen = int.Parse(parser.GetData("ProteinLength")[i]); if (score < minScore || score > maxScore) { continue; } if (qValColumn != null) { var fdr = double.Parse(qValColumn[i]); if (fdr > FdrCutoff) { continue; } } var sequenceText = GetSequenceText(sequence, mod); var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsPathFinder) { SequenceText = sequenceText, Modifications = mod, Pre = pre, Post = post, ProteinLength = proteinLen, SpectralEvalue = evalue, }; prsmList.Add(prsm); if (prsmList.Count >= maxPrsm) { break; } } return(prsmList); }