public PHRPReaderParser(string synopsisFilePath, FragmentLadderOptions fragLadderOptions) { m_fragLadderOptions = fragLadderOptions; m_fragLadderOptions.modificationList.Clear(); using (var reader = new StreamReader(synopsisFilePath)) { m_firstLine = reader.ReadLine().Split('\t'); for (int i = 0; i < m_firstLine.Length; i++) { if (m_firstLine[i] == "Peptide") { m_peptideColumnIndex = i; break; } } } var oStartupOptions = new clsPHRPStartupOptions { LoadModsAndSeqInfo = true, LoadMSGFResults = true, LoadScanStatsData = false, MaxProteinsPerPSM = 100 }; m_reader = new clsPHRPReader(synopsisFilePath, oStartupOptions); //m_reader.FastReadMode = true; m_reader.SkipDuplicatePSMs = true; m_parser = m_reader.PHRPParser; }
public async Task <IEnumerable <PrSm> > ReadAsync(IEnumerable <string> modIgnoreList = null, IProgress <double> progress = null) { var oStartupOptions = new clsPHRPStartupOptions { LoadModsAndSeqInfo = true }; var phrpReader = new clsPHRPReader(filePath, oStartupOptions); if (!string.IsNullOrEmpty(phrpReader.ErrorMessage)) { throw new Exception(phrpReader.ErrorMessage); } var identifications = await Task.Run( () => { var ids = new List <PrSm>(); while (phrpReader.MoveNext()) { phrpReader.FinalizeCurrentPSM(); var psm = phrpReader.CurrentPSM; var proteins = psm.Proteins; var parsedSequence = ParseSequence(psm.PeptideCleanSequence, psm.ModifiedResidues); foreach (var protein in proteins) { var prsm = new PrSm { Heavy = false, ProteinName = protein, ProteinDesc = string.Empty, Charge = psm.Charge, Sequence = parsedSequence, Scan = psm.ScanNumber, Score = Convert.ToDouble(psm.MSGFSpecProb), UseGolfScoring = true, QValue = 0, }; prsm.SetSequence(GetSequenceText(parsedSequence), parsedSequence); ids.Add(prsm); } } return(ids); }); return(identifications); }
/// <summary> /// Store Protein Info /// </summary> /// <param name="reader"></param> /// <param name="result"></param> protected static void StoreProteinInfo(clsPHRPReader reader, Evidence result) { foreach (var p in reader.CurrentPSM.ProteinDetails) { var protein = new ProteinInformation { ProteinName = p.Value.ProteinName, CleavageState = p.Value.CleavageState, TerminusState = p.Value.TerminusState, ResidueStart = p.Value.ResidueStart, ResidueEnd = p.Value.ResidueEnd }; result.Proteins.Add(protein); } }
/// <summary> /// PHRPReader configuration /// </summary> /// <param name="path"></param> /// <returns></returns> protected clsPHRPReader InitializeReader(string path) { AbortRequested = false; var oStartupOptions = new clsPHRPStartupOptions { LoadModsAndSeqInfo = true, LoadMSGFResults = true, LoadScanStatsData = false, MaxProteinsPerPSM = 100 }; UpdateProgress(0, "Initializing reader"); var reader = new clsPHRPReader(path, oStartupOptions) { SkipDuplicatePSMs = true, FastReadMode = true }; return(reader); }
/// <summary> /// Store PSM Data /// </summary> /// <param name="result"></param> /// <param name="reader"></param> /// <param name="specProb"></param> protected void StorePsmData(Evidence result, clsPHRPReader reader, double specProb) { result.Charge = reader.CurrentPSM.Charge; result.CleanPeptide = reader.CurrentPSM.PeptideCleanSequence; result.SeqWithNumericMods = reader.CurrentPSM.PeptideWithNumericMods; result.MonoisotopicMass = reader.CurrentPSM.PeptideMonoisotopicMass; result.ObservedMonoisotopicMass = reader.CurrentPSM.PrecursorNeutralMass; result.MultiProteinCount = (short)reader.CurrentPSM.Proteins.Count; result.Scan = reader.CurrentPSM.ScanNumber; result.Sequence = reader.CurrentPSM.Peptide; result.Mz = clsPeptideMassCalculator.ConvoluteMass(reader.CurrentPSM.PrecursorNeutralMass, 0, reader.CurrentPSM.Charge); result.SpecProb = specProb; result.DelM = Convert.ToDouble(reader.CurrentPSM.MassErrorDa); result.ModificationCount = (short)reader.CurrentPSM.ModifiedResidues.Count; result.PeptideInfo = new TargetPeptideInfo { Peptide = result.Sequence, CleanPeptide = result.CleanPeptide, PeptideWithNumericMods = result.SeqWithNumericMods }; if (reader.CurrentPSM.MassErrorPPM.Length != 0) { result.DelMPpm = Convert.ToDouble(reader.CurrentPSM.MassErrorPPM); } result.SeqInfoMonoisotopicMass = result.MonoisotopicMass; StoreProteinInfo(reader, result); if (result.ModificationCount != 0) { foreach (var info in reader.CurrentPSM.ModifiedResidues) { result.ModificationDescription += info.ModDefinition.MassCorrectionTag + ":" + info.ResidueLocInPeptide + " "; var ptm = new PostTranslationalModification { Location = info.ResidueLocInPeptide, Mass = info.ModDefinition.ModificationMass, Formula = info.ModDefinition.MassCorrectionTag, Name = info.ModDefinition.MassCorrectionTag }; result.Ptms.Add(ptm); } var encodedSeq = result.Sequence[0] + "."; int j = 0; foreach (var ptm in result.Ptms) { for (; j < ptm.Location; j++) { encodedSeq = encodedSeq + result.CleanPeptide[j]; } encodedSeq += "[" + ((ptm.Mass > 0)? "+":"-") + ptm.Formula + "]"; } for (; j < result.CleanPeptide.Length; j++) { encodedSeq += result.CleanPeptide[j]; } encodedSeq += "." + result.Sequence.Last(); result.EncodedNonNumericSequence = encodedSeq; } else { result.EncodedNonNumericSequence = result.Sequence; } }
/// <summary> /// Store Dataset Info /// </summary> /// <param name="result"></param> /// <param name="reader"></param> /// <param name="dataFilePath"></param> protected void StoreDatasetInfo(Evidence result, clsPHRPReader reader, string dataFilePath) { StoreDatasetInfo(result, reader.DatasetName, dataFilePath); }