Example #1
0
        /// <summary>
        /// Determines if the target should be filtered or not.
        /// </summary>
        /// <param name="t"></param>
        /// <returns></returns>
        public bool ShouldFilter(Target t)
        {
            // This is a bad way to do this...but
            // I don't want to put the immediate effort into redesigning this...more work
            // is needed to segment out the options class out of a general options class and into
            // a concrete class or classes
            SequestResult sequest = t as SequestResult;

            if (sequest != null)
            {
                // If it is to be exported, then we should not filter it
                return(!IsToBeExported(sequest));
            }
            XTandemResult xtandem = t as XTandemResult;

            if (xtandem != null)
            {
                return(!IsToBeExported(xtandem));
            }

            MsgfPlusResult msgfResult = t as MsgfPlusResult;

            if (msgfResult != null)
            {
                return(!IsToBeExported(msgfResult));
            }

            return(true);
        }
Example #2
0
        public bool IsToBeExported(MsgfPlusResult result)
        {
            if (result.Fdr > MsgfFDR)
            {
                return(false);
            }
            if (result.SpectralProbability > MsgfSpectralEValue)
            {
                return(false);
            }


            if (result.NumTrypticEnds == 2)
            {
                if (!ExportTryptic)
                {
                    return(false);
                }
                else
                {
                    return(true);
                }
            }
            if (result.NumTrypticEnds == 1)
            {
                if (!ExportPartiallyTryptic)
                {
                    return(false);
                }
                else
                {
                    return(true);
                }
            }
            if (result.NumTrypticEnds == 0)
            {
                if (!ExportNonTryptic)
                {
                    return(false);
                }
                else
                {
                    return(true);
                }
            }

            return(true);
        }
        /// <summary>
        /// Determines if a feature should be filtered or not.
        /// </summary>
        /// <param name="t"></param>
        /// <returns></returns>
        public bool ShouldFilter(Target target)
        {
            Sequence sequence = target.SequenceData;

            if (sequence == null)
            {
                return(true);
            }

            // in the alignment we will only use the unmodified peptides
            if (sequence.ModificationCount > Options.MaxModificationsForAlignment)
            {
                return(true);
            }

            MsgfPlusResult result = target as MsgfPlusResult;

            if (result == null)
            {
                return(true);
            }


            if (result.Fdr > Options.MsgfFDR)
            {
                return(true);
            }


            if (result.SpectralProbability > Options.MsgfSpectralEValue)
            {
                return(true);
            }

            return(false);
        }
Example #4
0
        /// <summary>
        /// Map the results of a MZIdentML read to MSGF+
        /// </summary>
        /// <param name="results">Object to populate with the results of the Mapping</param>
        /// <param name="path">Path to MZIdentML file</param>
        private void MapToMsgf(List <MsgfPlusResult> results, string path)
        {
            var filter = new MsgfPlusTargetFilter(ReaderOptions);

            var cleavageStateCalculator = new clsPeptideCleavageStateCalculator();

            var i     = 0;
            var total = m_specItems.Count;

            // Go through each Spectrum ID and map it to an MSGF+ result
            foreach (var item in m_specItems)
            {
                i++;
                if (i % 500 == 0)
                {
                    UpdateProgress((100 * ((float)i / total)));
                }
                // Skip this PSM if it doesn't pass the import filters
                // Note that qValue is basically FDR
                double qValue = item.Value.QValue;

                double specProb = item.Value.SpecEv;

                if (filter.ShouldFilter(qValue, specProb))
                {
                    continue;
                }

                if (item.Value.PepEvidence.Count == 0)
                {
                    continue;
                }

                var evidence = item.Value.PepEvidence[0];

                var result = new MsgfPlusResult
                {
                    AnalysisId               = i,
                    Charge                   = Convert.ToInt16(item.Value.Charge),
                    CleanPeptide             = item.Value.Peptide.Sequence,
                    SeqWithNumericMods       = null,
                    MonoisotopicMass         = clsPeptideMassCalculator.ConvoluteMass(item.Value.CalMz, item.Value.Charge, 0),
                    ObservedMonoisotopicMass = clsPeptideMassCalculator.ConvoluteMass(item.Value.ExperimentalMz, item.Value.Charge, 0),
                    MultiProteinCount        = Convert.ToInt16(item.Value.PepEvCount),
                    Scan              = item.Value.ScanNum,
                    Sequence          = evidence.Pre + "." + item.Value.Peptide.Sequence + "." + evidence.Post,
                    Mz                = 0,
                    SpecProb          = specProb,
                    DelM              = 0,
                    ModificationCount = Convert.ToInt16(item.Value.Peptide.Mods.Count)
                };

                // Populate some mass related items
                result.DelM    = result.ObservedMonoisotopicMass - result.MonoisotopicMass;
                result.DelMPpm = clsPeptideMassCalculator.MassToPPM(result.DelM, result.ObservedMonoisotopicMass);
                // We could compute m/z:
                //     Mz = clsPeptideMassCalculator.ConvoluteMass(result.ObservedMonoisotopicMass, 0, result.Charge);
                // But it's stored in the mzid file, so we'll use that
                result.Mz = item.Value.ExperimentalMz;

                StoreDatasetInfo(result, path);

                result.DataSet.Tool = LcmsIdentificationTool.MZIdentML;

                // Populate items specific to the MSGF+ results (stored as mzid)

                result.Reference = evidence.DbSeq.Accession;

                var eCleavageState = cleavageStateCalculator.ComputeCleavageState(item.Value.Peptide.Sequence, evidence.Pre, evidence.Post);
                result.NumTrypticEnds = clsPeptideCleavageStateCalculator.CleavageStateToShort(eCleavageState);

                result.DeNovoScore    = item.Value.DeNovoScore;
                result.MsgfScore      = item.Value.RawScore;
                result.SpecEValue     = item.Value.SpecEv;
                result.RankSpecEValue = item.Value.Rank;

                result.EValue            = item.Value.EValue;
                result.QValue            = qValue;
                result.DiscriminantValue = qValue;
                result.PepQValue         = item.Value.PepQValue;

                result.IsotopeError = item.Value.IsoError;

                if (result.ModificationCount > 0)
                {
                    var j = 0;

                    var numModSeq  = evidence.Pre + ".";
                    var encodedSeq = numModSeq;
                    foreach (var mod in item.Value.Peptide.Mods)
                    {
                        var ptm = new PostTranslationalModification
                        {
                            Location = mod.Key,
                            Mass     = mod.Value.Mass,
                            Formula  = UniModData.ModList[mod.Value.Tag].Formula.ToString(),
                            Name     = UniModData.ModList[mod.Value.Tag].Title
                        };
                        result.Ptms.Add(ptm);

                        for (; j < ptm.Location; j++)
                        {
                            numModSeq  = numModSeq + item.Value.Peptide.Sequence[j];
                            encodedSeq = encodedSeq + item.Value.Peptide.Sequence[j];
                        }

                        numModSeq += (ptm.Mass > 0) ? "+" : "-";
                        numModSeq  = numModSeq + ptm.Mass;

                        encodedSeq += "[" + ((ptm.Mass > 0)? "+":"-") + ptm.Formula + "]";
                    }
                    for (; j < item.Value.Peptide.Sequence.Length; j++)
                    {
                        numModSeq   = numModSeq + item.Value.Peptide.Sequence[j];
                        encodedSeq += item.Value.Peptide.Sequence[j];
                    }
                    numModSeq   = numModSeq + "." + evidence.Post;
                    encodedSeq += "." + evidence.Post;
                    result.SeqWithNumericMods        = numModSeq;
                    result.EncodedNonNumericSequence = encodedSeq;
                }
                else
                {
                    result.SeqWithNumericMods        = result.Sequence;
                    result.EncodedNonNumericSequence = result.Sequence;
                }

                result.PeptideInfo = new TargetPeptideInfo
                {
                    Peptide                = result.Sequence,
                    CleanPeptide           = result.CleanPeptide,
                    PeptideWithNumericMods = result.SeqWithNumericMods
                };


                result.SeqInfoMonoisotopicMass = result.MonoisotopicMass;
                result.ModificationDescription = null;

                foreach (var thing in item.Value.PepEvidence)
                {
                    var protein = new ProteinInformation
                    {
                        ProteinName  = thing.DbSeq.Accession,
                        ResidueStart = thing.Start,
                        ResidueEnd   = thing.End
                    };
                    ComputeTerminusState(evidence, result.NumTrypticEnds, protein);
                    result.Proteins.Add(protein);
                }

                if (result.ModificationCount > 0)
                {
                    foreach (var mod in item.Value.Peptide.Mods)
                    {
                        // TODO: Confirm that this is valid math (MEM thinks it may not be)
                        result.SeqInfoMonoisotopicMass += mod.Value.Mass;

                        result.ModificationDescription += mod.Value.Tag + ":" + mod.Key + "  ";
                    }
                }

                results.Add(result);
            }
        }
Example #5
0
        /// <summary>
        /// Read and process a MSGF+ PHRP file
        /// </summary>
        /// <param name="path">MSGF+ file to read</param>
        /// <returns></returns>
        public override LcmsDataSet Read(string path)
        {
            var results = new List <MsgfPlusResult>();
            var filter  = new MsgfPlusTargetFilter(ReaderOptions);

            // Get the Evidences using PHRPReader which looks at the path that was passed in to determine the data type
            int resultsProcessed = 0;
            var reader           = InitializeReader(path);

            while (reader.MoveNext())
            {
                resultsProcessed++;
                if (resultsProcessed % 500 == 0)
                {
                    UpdateProgress(reader.PercentComplete);
                }

                if (AbortRequested)
                {
                    break;
                }

                // Skip this PSM if it doesn't pass the import filters
                // Note that qValue is basically FDR
                double qValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_QValue, -1);
                if (qValue < 0)
                {
                    qValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_FDR, 0);
                }

                double specProb = 0;
                if (!string.IsNullOrEmpty(reader.CurrentPSM.MSGFSpecProb))
                {
                    specProb = Convert.ToDouble(reader.CurrentPSM.MSGFSpecProb);
                }

                if (filter.ShouldFilter(qValue, specProb))
                {
                    continue;
                }

                reader.FinalizeCurrentPSM();

                if (reader.CurrentPSM.SeqID == 0)
                {
                    continue;
                }

                var result = new MsgfPlusResult
                {
                    AnalysisId = reader.CurrentPSM.ResultID
                };

                StorePsmData(result, reader, specProb);

                StoreDatasetInfo(result, reader, path);
                result.DataSet.Tool = LcmsIdentificationTool.MsgfPlus;

                // Populate items specific to MGSF+
                result.Reference      = reader.CurrentPSM.ProteinFirst;
                result.NumTrypticEnds = reader.CurrentPSM.NumTrypticTerminii;

                result.DeNovoScore = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_DeNovoScore, 0);
                result.MsgfScore   = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_MSGFScore, 0);

                result.SpecEValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_MSGFDB_SpecEValue, -1);
                if (result.SpecEValue < 0)
                {
                    result.SpecEValue     = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_MSGFDB_SpecProb, 0);
                    result.RankSpecEValue = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_Rank_MSGFDB_SpecProb, 0);
                }
                else
                {
                    result.RankSpecEValue = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_Rank_MSGFDB_SpecEValue, 0);
                }

                result.EValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_EValue, 0);

                result.QValue            = qValue;
                result.DiscriminantValue = qValue;

                result.PepQValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_PepQValue, -1);
                if (result.PepQValue < 0)
                {
                    result.PepQValue = reader.CurrentPSM.GetScoreDbl(clsPHRPParserMSGFDB.DATA_COLUMN_PepFDR, 0);
                }

                result.IsotopeError = reader.CurrentPSM.GetScoreInt(clsPHRPParserMSGFDB.DATA_COLUMN_Isotope_Error, 0);

                results.Add(result);
            }

            ComputeNets(results);

            return(new LcmsDataSet(Path.GetFileNameWithoutExtension(path), LcmsIdentificationTool.MsgfPlus, results));
        }