public MsGfMatch(string resultStr, MsGfPlusHeaderInformation header) { var token = resultStr.Split('\t'); if (token.Length != header.NumColumns) { IsValid = false; } else { SpecFile = token[header.SpecFileColNum]; Peptide = token[header.PeptideColNum]; if (header.FormulaColNum > 0) { Formula = Composition.Parse(token[header.FormulaColNum]); } ScanNum = Convert.ToInt32(token[header.ScanNumColNum]); Charge = Convert.ToInt32(token[header.ChargeColNum]); Protein = token[header.ProteinColNum]; DeNovoScore = Convert.ToInt32(token[header.DeNovoScoreColNum]); MsgfScore = Convert.ToInt32(token[header.MsgfScoreColNum]); SpecEValue = Convert.ToDouble(token[header.SpecEValueColNum]); if (header.QValueColNum > 0) { QValue = Convert.ToDouble(token[header.QValueColNum]); } if (header.PepQValueColNum > 0) { PepQValue = Convert.ToDouble(token[header.PepQValueColNum]); } IsValid = true; } }
public MsGfResults(string resultFilePath) { _msGfMatches = new List <MsGfMatch>(); MsGfPlusHeaderInformation headerInfo = null; var prevScanNum = -1; foreach (var line in File.ReadLines(resultFilePath)) { if (headerInfo == null && line.StartsWith("#")) { headerInfo = new MsGfPlusHeaderInformation(line); continue; } var match = new MsGfMatch(line, headerInfo); if (match.ScanNum == prevScanNum) { continue; } prevScanNum = match.ScanNum; if (!match.IsValid || match.Protein.StartsWith(FastaDatabase.DecoyProteinPrefix)) { continue; } _msGfMatches.Add(match); } _msGfMatches.Sort(); }
public int PostProcessing(string outputFilePath) { // Parse MS-GF+ results var pepToResults = new Dictionary <string, MsGfMatch>(); MsGfPlusHeaderInformation headerInfo = null; foreach (var line in File.ReadLines(MsGfResultPath)) { if (line.StartsWith("#")) { headerInfo = new MsGfPlusHeaderInformation(line); continue; } var match = new MsGfMatch(line, headerInfo); if (!match.IsValid) { continue; } if (match.SpecEValue > SpecEValueThreshold) { continue; } if (!IsValid(match)) { continue; } MsGfMatch prevMatch; if (!pepToResults.TryGetValue(match.Peptide, out prevMatch)) { pepToResults[match.Peptide] = match; match.NumMatches = 1; } else { if (match.SpecEValue < prevMatch.SpecEValue) { pepToResults[match.Peptide] = match; match.NumMatches += prevMatch.NumMatches; } else { ++prevMatch.NumMatches; } } } //var filteredPsms = pepToResults.Select(entry => entry.Value).Where(IsValid).ToList(); var filteredPsms = pepToResults.Select(entry => entry.Value).ToList(); filteredPsms.Sort(); // compute FDR var qValue = GetQValues(filteredPsms); var index = -1; var numId = 0; using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("#SpecFile\tPeptide\tScanNum\tPrecursorMz\tCharge\tProtein\tNumMatches\tDeNovoScore\tMSGFScore\tSpecEValue\tPepQValue"); foreach (var match in filteredPsms) { //if (match.Protein.StartsWith("DecoyPrefix")) continue; writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}", match.SpecFile //, match.Peptide.Replace("C+57.021", "C") , match.Peptide , match.ScanNum , new Ion(match.Formula, match.Charge).GetMonoIsotopicMz() , match.Charge , match.Protein , match.NumMatches , match.DeNovoScore , match.MsgfScore , match.SpecEValue , qValue[++index] ); if (!match.Protein.StartsWith(DecoyPrefix)) { if (qValue[index] <= 0.01) { ++numId; } } } } return(numId); }