Пример #1
0
        public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFlyFast(List <Protein> proteins, bool allowSNP, Queries AllQueries)
        {
            foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP))
            {
                //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance));
                //if (firstIndex >= 0 && firstIndex < AllQueries.Count)
                //    yield return new Tuple<Peptide, int>(peptide, firstIndex);

                //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries))
                //if (!TargetPeptides.Contains(peptide.BaseSequence))
                //{
                foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms))
                {
                    modPeptide.SetFixedModifications(options.fixedModifications);
                    int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance));
                    if (firstIndex >= 0 && firstIndex < AllQueries.Count)
                    {
                        yield return(new Tuple <Peptide, int>(modPeptide, firstIndex));
                    }
                }

                //TODO check if this favors targets over decoys since proteins are sorted target->deco
                //    TargetPeptides.Add(peptide.BaseSequence);
                //}
            }
        }
Пример #2
0
        public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFly(List <Protein> proteins, bool allowSNP, Queries AllQueries)
        {
            //HashSet<string> TargetPeptides = new HashSet<string>();
            //Dictionary<string, int> TargetPeptides = new Dictionary<string, int>();
            //Digest proteins and store peptides in a Dictionnary
            //Does not fit in memory -> 360 Go ....
            //dicOfPeptideSequences = new Dictionary<string, List<Protein>>();
            //double minimumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.minimumPrecursorMonoisotopicPeakOffset : 0;
            //double maximumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.maximumPrecursorMonoisotopicPeakOffset : 0;
            foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP))
            {
                //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance));
                //if (firstIndex >= 0 && firstIndex < AllQueries.Count)
                //    yield return new Tuple<Peptide, int>(peptide, firstIndex);

                //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries))
                //if (!TargetPeptides.Contains(peptide.BaseSequence))
                //{
                foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms))
                {
                    modPeptide.SetFixedModifications(options.fixedModifications);
                    int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance));
                    if (firstIndex >= 0 && firstIndex < AllQueries.Count)
                    {
                        yield return(new Tuple <Peptide, int>(modPeptide, firstIndex));
                    }
                }

                //TODO check if this favors targets over decoys since proteins are sorted target->deco
                //    TargetPeptides.Add(peptide.BaseSequence);
                //}
            }
        }
Пример #3
0
        public void Export(double fdr, string keyword = "", bool onlyPrecursors = false)
        {
            dbOptions.ConSole.WriteLine("Exporting at " + (fdr * 100) + "% FDR (Decoy/Target)...");

            List <Precursor> prec = null;

            if (precursors != null)
            {
                if (matchedPrecursors == null)
                {
                    this.matchedPrecursors = new Precursors();
                    foreach (Precursor precursor in precursors)
                    {
                        if (precursor.psms.Count > 0)
                        {
                            matchedPrecursors.Add(precursor);
                        }
                    }
                }/*
                  * List<Precursor> prec = FDR.PrecursorsV2(precursors, fdr, 1);
                  * Sol.CONSOLE.OutputLine(">   " + prec.Count + " Precursors");
                  * MSSearcher.Export(dbOptions.outputFolder + keyword + "precursors.csv", prec);//*/
                 /*
                  * prec = Optimizer.PrecursorOptimizer(matchedPrecursors, fdr);
                  * Sol.CONSOLE.OutputLine(">   " + prec.Count + " Optimized Precursors");
                  * MSSearcher.Export(dbOptions.outputFolder + keyword + "Optimized_precursors.csv", prec);//*/

                prec = matchedPrecursors.ComputeAtFDR(fdr);
                dbOptions.ConSole.WriteLine(">   " + prec.Count + " Uptimized V5 Precursors");
                MSSearcher.Export(dbOptions.OutputFolder + keyword + "UptimizedV5_precursors.csv", prec);
            }

            if (!onlyPrecursors)
            {
                if (queries != null)
                {
                    List <Query> qs = queries.ComputeAtFDR(fdr);
                    dbOptions.ConSole.WriteLine(">   " + qs.Count + " PSMs (Top 10)");
                    MSSearcher.Export(dbOptions.OutputFolder + keyword + "queries.csv", qs);
                }
                if (clusters != null)
                {
                    //List<PeptideSpectrumMatch> psms = FDR.PSMs(clusters, fdr, 1, 10);
                    //dbOptions.ConSole.WriteLine(">   " + psms.Count + " PSMs (Top 10)");
                    //MSSearcher.Export(dbOptions.outputFolder + keyword + "psms_Top10.csv", psms);

                    //psms = FDR.PSMs(clusters, fdr, 1, 1);
                    //dbOptions.ConSole.WriteLine(">   " + psms.Count + " PSMs");
                    //MSSearcher.Export(dbOptions.outputFolder + keyword + "psms_Best.csv", psms);
                }

                if (peptides != null)
                {
                    List <PeptideMatch> pep = peptideSequences.ComputeAtFDR(fdr);
                    dbOptions.ConSole.WriteLine(">   " + pep.Count + " Peptides Sequences (Version 5)");
                    PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptideSequencesV5_.csv", pep);

                    PeptideSearcher sr   = new PeptideSearcher(dbOptions);
                    PeptideMatches  seqs = sr.Search(clusters, prec, false);
                    pep = seqs.ComputeAtFDR(fdr);
                    dbOptions.ConSole.WriteLine(">   " + pep.Count + " Peptides Sequences (Version 5b)");
                    PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptideSequencesV5b_PrecursorFDRed.csv", pep);

                    pep = peptides.ComputeAtFDR(fdr);
                    dbOptions.ConSole.WriteLine(">   " + pep.Count + " Peptides (Version 5)");
                    PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptidesV5_.csv", pep);
                }
                if (proteins != null)
                {
                    List <ProteinGroupMatch> prots = proteins.ComputeAtFDR(fdr);
                    dbOptions.ConSole.WriteLine(">   " + prots.Count + " Proteins");
                    ProteinSearcher.Export(dbOptions.OutputFolder + keyword + "proteins_.csv", prots);
                }
            }
        }
Пример #4
0
        }//*/

        /// <summary>
        /// Latest version of the search routine. Associates spectrum to digested peptide sequences,
        /// aligns precursors and fragments, clusters common precursors accross samples, create the list of detected
        /// peptide and protein sequences.
        /// TODO Align maps together (retention time)
        /// </summary>
        /// <param name="queries"></param>
        /// <returns></returns>
        public Result SearchVersionAugust2013(Queries queries, bool optimize)
        {
            Result result = new Result();

            result.queries   = queries;
            result.dbOptions = dbOptions;
            result.samples   = Project;

            DBSearcher dbSearcher = new DBSearcher(dbOptions);
            Digestion  ps         = new Digestion(dbOptions);

            if (dbOptions.NoEnzymeSearch)
            {
                result.SetPrecursors(dbSearcher.Search(queries, ps.DigestProteomeOnTheFlyNoEnzyme(AllProteins, queries)));
            }
            else
            {
                result.SetPrecursors(dbSearcher.Search(queries, ps.DigestProteomeOnTheFly(AllProteins, false, queries)));
            }
            dbOptions.ConSole.WriteLine(result.precursors.Count + " precursors matched !");

            PeptideSpectrumMatches allPSMs = new PeptideSpectrumMatches();

            foreach (Precursor precursor in result.precursors)
            {
                foreach (PeptideSpectrumMatch psm in precursor.psms_AllPossibilities)
                {
                    allPSMs.Add(psm);
                }
            }

            //Add all psm possibilities to psms list
            foreach (PeptideSpectrumMatch psm in allPSMs)
            {
                psm.Query.precursor.psms.Add(psm);
            }

            long nbTargets = result.SetPrecursors(result.precursors);

            dbOptions.ConSole.WriteLine("Targets before Optimizing Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");

            if (optimize)
            {
                //allPSMs.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result);
                //result.matchedPrecursors.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result);
                nbTargets = result.SetPrecursors(result.precursors);
                dbOptions.ConSole.WriteLine("Targets : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");
                //*/
                //TODO Improve alignment results

                /*
                 * Align.AlignPrecursorsByDiff(result, allPSMs);
                 * nbTargets = result.SetPrecursors(result.precursors);
                 * dbOptions.ConSole.WriteLine("Targets after precursor alignment : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");
                 *
                 * Align.AlignProductsByDiff(result, allPSMs);
                 * nbTargets = result.SetPrecursors(result.precursors);
                 * dbOptions.ConSole.WriteLine("Targets after fragment alignment : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");
                 * //*/
                /*
                 * dbOptions.precursorMassTolerance.Value = Align.CropPrecursors(result, allPSMs);
                 * nbTargets = result.SetPrecursors(result.precursors);
                 * dbOptions.ConSole.WriteLine("Targets after croping precursors : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");
                 *
                 * dbOptions.productMassTolerance.Value = Align.CropProducts(result, allPSMs);
                 * nbTargets = result.SetPrecursors(result.precursors);
                 * dbOptions.ConSole.WriteLine("Targets after croping fragments : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");
                 * //*/
            }
            //*/
            allPSMs = null;
            long       bestTargets = nbTargets;
            MSSearcher msSearcher  = new MSSearcher(dbOptions, Project);

            msSearcher.CumulPsm(result.matchedPrecursors);//TODO Check if its still needed

            //Step 1 : Cluster psms together based on precursor feature //TODO Implement ProteoProfile Scoring based clustering
            //Group in clusters
            result.clusters = msSearcher.Search(result.matchedPrecursors, true);
            //Todo Align retention times
            //Todo redo clusterization, based on retention time aligned maps

            //Step 2 : Regroup based on peptide sequence (Morpheus code)
            PeptideSearcher pepSearcher = new PeptideSearcher(dbOptions);

            result.peptides         = pepSearcher.Search(result.clusters, result.matchedPrecursors, true);
            result.peptideSequences = pepSearcher.Search(result.clusters, result.matchedPrecursors, false);

            //Step 3 : Regroup based on protein sequences (Morpheus code)
            ProteinSearcher protSearcher = new ProteinSearcher(dbOptions);

            result.proteins = protSearcher.SearchLatest(result.peptideSequences);//, dbSearcher.DicOfProteins);

            //long lastNbTarget = nbTargets;
            //do
            //{
            //    lastNbTarget = nbTargets;
            //UpdatePsmScores(result.proteins);
            //    nbTargets = result.SetPrecursors(result.precursors);
            //} while (lastNbTarget < nbTargets);//*/

            result.peptides         = pepSearcher.Search(result.clusters, result.matchedPrecursors, true);
            result.peptideSequences = pepSearcher.Search(result.clusters, result.matchedPrecursors, false);

            if (optimize)
            {
                dbOptions.ConSole.WriteLine("Targets before second Optimization of Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");
                //result.matchedPrecursors.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result);
                nbTargets = result.SetPrecursors(result.precursors);
                dbOptions.ConSole.WriteLine("Targets after ReOptimizing PSM Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");//*/
            }

            //Step 5 : Compute the new number of Targets
            nbTargets = result.SetPrecursors(result.precursors);
            if (nbTargets < bestTargets)
            {
                dbOptions.ConSole.WriteLine("FAILED to improve PSMs while adding protein and peptide information");
            }

            dbOptions.ConSole.WriteLine(result.matchedPrecursors.Count + " precursors remaining after ProPheus Search!");
            return(result);
        }//*/