/// <summary> /// Determines if the target should be filtered or not. /// </summary> /// <param name="t"></param> /// <returns></returns> public bool ShouldFilter(Target t) { // This is a bad way to do this...but // I don't want to put the immediate effort into redesigning this...more work // is needed to segment out the options class out of a general options class and into // a concrete class or classes SequestResult sequest = t as SequestResult; if (sequest != null) { // If it is to be exported, then we should not filter it return(!IsToBeExported(sequest)); } XTandemResult xtandem = t as XTandemResult; if (xtandem != null) { return(!IsToBeExported(xtandem)); } MsgfPlusResult msgfResult = t as MsgfPlusResult; if (msgfResult != null) { return(!IsToBeExported(msgfResult)); } return(true); }
public bool IsToBeExported(MsgfPlusResult result) { if (result.Fdr > MsgfFDR) { return(false); } if (result.SpectralProbability > MsgfSpectralEValue) { return(false); } if (result.NumTrypticEnds == 2) { if (!ExportTryptic) { return(false); } else { return(true); } } if (result.NumTrypticEnds == 1) { if (!ExportPartiallyTryptic) { return(false); } else { return(true); } } if (result.NumTrypticEnds == 0) { if (!ExportNonTryptic) { return(false); } else { return(true); } } return(true); }
/// <summary> /// Determines if a feature should be filtered or not. /// </summary> /// <param name="t"></param> /// <returns></returns> public bool ShouldFilter(Target target) { Sequence sequence = target.SequenceData; if (sequence == null) { return(true); } // in the alignment we will only use the unmodified peptides if (sequence.ModificationCount > Options.MaxModificationsForAlignment) { return(true); } MsgfPlusResult result = target as MsgfPlusResult; if (result == null) { return(true); } if (result.Fdr > Options.MsgfFDR) { return(true); } if (result.SpectralProbability > Options.MsgfSpectralEValue) { return(true); } return(false); }
/// <summary> /// Map the results of a MZIdentML read to MSGF+ /// </summary> /// <param name="results">Object to populate with the results of the Mapping</param> /// <param name="path">Path to MZIdentML file</param> private void MapToMsgf(List <MsgfPlusResult> results, string path) { var filter = new MsgfPlusTargetFilter(ReaderOptions); var cleavageStateCalculator = new clsPeptideCleavageStateCalculator(); var i = 0; var total = m_specItems.Count; // Go through each Spectrum ID and map it to an MSGF+ result foreach (var item in m_specItems) { i++; if (i % 500 == 0) { UpdateProgress((100 * ((float)i / total))); } // Skip this PSM if it doesn't pass the import filters // Note that qValue is basically FDR double qValue = item.Value.QValue; double specProb = item.Value.SpecEv; if (filter.ShouldFilter(qValue, specProb)) { continue; } if (item.Value.PepEvidence.Count == 0) { continue; } var evidence = item.Value.PepEvidence[0]; var result = new MsgfPlusResult { AnalysisId = i, Charge = Convert.ToInt16(item.Value.Charge), CleanPeptide = item.Value.Peptide.Sequence, SeqWithNumericMods = null, MonoisotopicMass = clsPeptideMassCalculator.ConvoluteMass(item.Value.CalMz, item.Value.Charge, 0), ObservedMonoisotopicMass = clsPeptideMassCalculator.ConvoluteMass(item.Value.ExperimentalMz, item.Value.Charge, 0), MultiProteinCount = Convert.ToInt16(item.Value.PepEvCount), Scan = item.Value.ScanNum, Sequence = evidence.Pre + "." + item.Value.Peptide.Sequence + "." + evidence.Post, Mz = 0, SpecProb = specProb, DelM = 0, ModificationCount = Convert.ToInt16(item.Value.Peptide.Mods.Count) }; // Populate some mass related items result.DelM = result.ObservedMonoisotopicMass - result.MonoisotopicMass; result.DelMPpm = clsPeptideMassCalculator.MassToPPM(result.DelM, result.ObservedMonoisotopicMass); // We could compute m/z: // Mz = clsPeptideMassCalculator.ConvoluteMass(result.ObservedMonoisotopicMass, 0, result.Charge); // But it's stored in the mzid file, so we'll use that result.Mz = item.Value.ExperimentalMz; StoreDatasetInfo(result, path); result.DataSet.Tool = LcmsIdentificationTool.MZIdentML; // Populate items specific to the MSGF+ results (stored as mzid) result.Reference = evidence.DbSeq.Accession; var eCleavageState = cleavageStateCalculator.ComputeCleavageState(item.Value.Peptide.Sequence, evidence.Pre, evidence.Post); result.NumTrypticEnds = clsPeptideCleavageStateCalculator.CleavageStateToShort(eCleavageState); result.DeNovoScore = item.Value.DeNovoScore; result.MsgfScore = item.Value.RawScore; result.SpecEValue = item.Value.SpecEv; result.RankSpecEValue = item.Value.Rank; result.EValue = item.Value.EValue; result.QValue = qValue; result.DiscriminantValue = qValue; result.PepQValue = item.Value.PepQValue; result.IsotopeError = item.Value.IsoError; if (result.ModificationCount > 0) { var j = 0; var numModSeq = evidence.Pre + "."; var encodedSeq = numModSeq; foreach (var mod in item.Value.Peptide.Mods) { var ptm = new PostTranslationalModification { Location = mod.Key, Mass = mod.Value.Mass, Formula = UniModData.ModList[mod.Value.Tag].Formula.ToString(), Name = UniModData.ModList[mod.Value.Tag].Title }; result.Ptms.Add(ptm); for (; j < ptm.Location; j++) { numModSeq = numModSeq + item.Value.Peptide.Sequence[j]; encodedSeq = encodedSeq + item.Value.Peptide.Sequence[j]; } numModSeq += (ptm.Mass > 0) ? "+" : "-"; numModSeq = numModSeq + ptm.Mass; encodedSeq += "[" + ((ptm.Mass > 0)? "+":"-") + ptm.Formula + "]"; } for (; j < item.Value.Peptide.Sequence.Length; j++) { numModSeq = numModSeq + item.Value.Peptide.Sequence[j]; encodedSeq += item.Value.Peptide.Sequence[j]; } numModSeq = numModSeq + "." + evidence.Post; encodedSeq += "." + evidence.Post; result.SeqWithNumericMods = numModSeq; result.EncodedNonNumericSequence = encodedSeq; } else { result.SeqWithNumericMods = result.Sequence; result.EncodedNonNumericSequence = result.Sequence; } result.PeptideInfo = new TargetPeptideInfo { Peptide = result.Sequence, CleanPeptide = result.CleanPeptide, PeptideWithNumericMods = result.SeqWithNumericMods }; result.SeqInfoMonoisotopicMass = result.MonoisotopicMass; result.ModificationDescription = null; foreach (var thing in item.Value.PepEvidence) { var protein = new ProteinInformation { ProteinName = thing.DbSeq.Accession, ResidueStart = thing.Start, ResidueEnd = thing.End }; ComputeTerminusState(evidence, result.NumTrypticEnds, protein); result.Proteins.Add(protein); } if (result.ModificationCount > 0) { foreach (var mod in item.Value.Peptide.Mods) { // TODO: Confirm that this is valid math (MEM thinks it may not be) result.SeqInfoMonoisotopicMass += mod.Value.Mass; result.ModificationDescription += mod.Value.Tag + ":" + mod.Key + " "; } } results.Add(result); } }
/// <summary> /// Read and process a MSGF+ PHRP file /// </summary> /// <param name="path">MSGF+ file to read</param> /// <returns></returns> public override LcmsDataSet Read(string path) { var results = new List <MsgfPlusResult>(); var filter = new MsgfPlusTargetFilter(ReaderOptions); // Get the Evidences using PHRPReader which looks at the path that was passed in to determine the data type int resultsProcessed = 0; var reader = InitializeReader(path); while (reader.MoveNext()) { resultsProcessed++; if (resultsProcessed % 500 == 0) { UpdateProgress(reader.PercentComplete); } if (AbortRequested) { break; } // Skip this PSM if it doesn't pass the import filters // Note that qValue is basically FDR double qValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_QValue, -1); if (qValue < 0) { qValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_FDR, 0); } double specProb = 0; if (!string.IsNullOrEmpty(reader.CurrentPSM.MSGFSpecProb)) { specProb = Convert.ToDouble(reader.CurrentPSM.MSGFSpecProb); } if (filter.ShouldFilter(qValue, specProb)) { continue; } reader.FinalizeCurrentPSM(); if (reader.CurrentPSM.SeqID == 0) { continue; } var result = new MsgfPlusResult { AnalysisId = reader.CurrentPSM.ResultID }; StorePsmData(result, reader, specProb); StoreDatasetInfo(result, reader, path); result.DataSet.Tool = LcmsIdentificationTool.MsgfPlus; // Populate items specific to MGSF+ result.Reference = reader.CurrentPSM.ProteinFirst; result.NumTrypticEnds = reader.CurrentPSM.NumTrypticTerminii; result.DeNovoScore = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_DeNovoScore, 0); result.MsgfScore = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_MSGFScore, 0); result.SpecEValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_MSGFDB_SpecEValue, -1); if (result.SpecEValue < 0) { result.SpecEValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_MSGFDB_SpecProb, 0); result.RankSpecEValue = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_Rank_MSGFDB_SpecProb, 0); } else { result.RankSpecEValue = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_Rank_MSGFDB_SpecEValue, 0); } result.EValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_EValue, 0); result.QValue = qValue; result.DiscriminantValue = qValue; result.PepQValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_PepQValue, -1); if (result.PepQValue < 0) { result.PepQValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_PepFDR, 0); } result.IsotopeError = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_Isotope_Error, 0); results.Add(result); } ComputeNets(results); return(new LcmsDataSet(Path.GetFileNameWithoutExtension(path), LcmsIdentificationTool.MsgfPlus, results)); }