public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFlyFast(List <Protein> proteins, bool allowSNP, Queries AllQueries) { foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP)) { //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance)); //if (firstIndex >= 0 && firstIndex < AllQueries.Count) // yield return new Tuple<Peptide, int>(peptide, firstIndex); //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries)) //if (!TargetPeptides.Contains(peptide.BaseSequence)) //{ foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms)) { modPeptide.SetFixedModifications(options.fixedModifications); int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance)); if (firstIndex >= 0 && firstIndex < AllQueries.Count) { yield return(new Tuple <Peptide, int>(modPeptide, firstIndex)); } } //TODO check if this favors targets over decoys since proteins are sorted target->deco // TargetPeptides.Add(peptide.BaseSequence); //} } }
public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFly(List <Protein> proteins, bool allowSNP, Queries AllQueries) { //HashSet<string> TargetPeptides = new HashSet<string>(); //Dictionary<string, int> TargetPeptides = new Dictionary<string, int>(); //Digest proteins and store peptides in a Dictionnary //Does not fit in memory -> 360 Go .... //dicOfPeptideSequences = new Dictionary<string, List<Protein>>(); //double minimumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.minimumPrecursorMonoisotopicPeakOffset : 0; //double maximumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.maximumPrecursorMonoisotopicPeakOffset : 0; foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP)) { //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance)); //if (firstIndex >= 0 && firstIndex < AllQueries.Count) // yield return new Tuple<Peptide, int>(peptide, firstIndex); //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries)) //if (!TargetPeptides.Contains(peptide.BaseSequence)) //{ foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms)) { modPeptide.SetFixedModifications(options.fixedModifications); int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance)); if (firstIndex >= 0 && firstIndex < AllQueries.Count) { yield return(new Tuple <Peptide, int>(modPeptide, firstIndex)); } } //TODO check if this favors targets over decoys since proteins are sorted target->deco // TargetPeptides.Add(peptide.BaseSequence); //} } }
public void Export(double fdr, string keyword = "", bool onlyPrecursors = false) { dbOptions.ConSole.WriteLine("Exporting at " + (fdr * 100) + "% FDR (Decoy/Target)..."); List <Precursor> prec = null; if (precursors != null) { if (matchedPrecursors == null) { this.matchedPrecursors = new Precursors(); foreach (Precursor precursor in precursors) { if (precursor.psms.Count > 0) { matchedPrecursors.Add(precursor); } } }/* * List<Precursor> prec = FDR.PrecursorsV2(precursors, fdr, 1); * Sol.CONSOLE.OutputLine("> " + prec.Count + " Precursors"); * MSSearcher.Export(dbOptions.outputFolder + keyword + "precursors.csv", prec);//*/ /* * prec = Optimizer.PrecursorOptimizer(matchedPrecursors, fdr); * Sol.CONSOLE.OutputLine("> " + prec.Count + " Optimized Precursors"); * MSSearcher.Export(dbOptions.outputFolder + keyword + "Optimized_precursors.csv", prec);//*/ prec = matchedPrecursors.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + prec.Count + " Uptimized V5 Precursors"); MSSearcher.Export(dbOptions.OutputFolder + keyword + "UptimizedV5_precursors.csv", prec); } if (!onlyPrecursors) { if (queries != null) { List <Query> qs = queries.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + qs.Count + " PSMs (Top 10)"); MSSearcher.Export(dbOptions.OutputFolder + keyword + "queries.csv", qs); } if (clusters != null) { //List<PeptideSpectrumMatch> psms = FDR.PSMs(clusters, fdr, 1, 10); //dbOptions.ConSole.WriteLine("> " + psms.Count + " PSMs (Top 10)"); //MSSearcher.Export(dbOptions.outputFolder + keyword + "psms_Top10.csv", psms); //psms = FDR.PSMs(clusters, fdr, 1, 1); //dbOptions.ConSole.WriteLine("> " + psms.Count + " PSMs"); //MSSearcher.Export(dbOptions.outputFolder + keyword + "psms_Best.csv", psms); } if (peptides != null) { List <PeptideMatch> pep = peptideSequences.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + pep.Count + " Peptides Sequences (Version 5)"); PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptideSequencesV5_.csv", pep); PeptideSearcher sr = new PeptideSearcher(dbOptions); PeptideMatches seqs = sr.Search(clusters, prec, false); pep = seqs.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + pep.Count + " Peptides Sequences (Version 5b)"); PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptideSequencesV5b_PrecursorFDRed.csv", pep); pep = peptides.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + pep.Count + " Peptides (Version 5)"); PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptidesV5_.csv", pep); } if (proteins != null) { List <ProteinGroupMatch> prots = proteins.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + prots.Count + " Proteins"); ProteinSearcher.Export(dbOptions.OutputFolder + keyword + "proteins_.csv", prots); } } }
}//*/ /// <summary> /// Latest version of the search routine. Associates spectrum to digested peptide sequences, /// aligns precursors and fragments, clusters common precursors accross samples, create the list of detected /// peptide and protein sequences. /// TODO Align maps together (retention time) /// </summary> /// <param name="queries"></param> /// <returns></returns> public Result SearchVersionAugust2013(Queries queries, bool optimize) { Result result = new Result(); result.queries = queries; result.dbOptions = dbOptions; result.samples = Project; DBSearcher dbSearcher = new DBSearcher(dbOptions); Digestion ps = new Digestion(dbOptions); if (dbOptions.NoEnzymeSearch) { result.SetPrecursors(dbSearcher.Search(queries, ps.DigestProteomeOnTheFlyNoEnzyme(AllProteins, queries))); } else { result.SetPrecursors(dbSearcher.Search(queries, ps.DigestProteomeOnTheFly(AllProteins, false, queries))); } dbOptions.ConSole.WriteLine(result.precursors.Count + " precursors matched !"); PeptideSpectrumMatches allPSMs = new PeptideSpectrumMatches(); foreach (Precursor precursor in result.precursors) { foreach (PeptideSpectrumMatch psm in precursor.psms_AllPossibilities) { allPSMs.Add(psm); } } //Add all psm possibilities to psms list foreach (PeptideSpectrumMatch psm in allPSMs) { psm.Query.precursor.psms.Add(psm); } long nbTargets = result.SetPrecursors(result.precursors); dbOptions.ConSole.WriteLine("Targets before Optimizing Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); if (optimize) { //allPSMs.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result); //result.matchedPrecursors.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result); nbTargets = result.SetPrecursors(result.precursors); dbOptions.ConSole.WriteLine("Targets : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); //*/ //TODO Improve alignment results /* * Align.AlignPrecursorsByDiff(result, allPSMs); * nbTargets = result.SetPrecursors(result.precursors); * dbOptions.ConSole.WriteLine("Targets after precursor alignment : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * * Align.AlignProductsByDiff(result, allPSMs); * nbTargets = result.SetPrecursors(result.precursors); * dbOptions.ConSole.WriteLine("Targets after fragment alignment : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * //*/ /* * dbOptions.precursorMassTolerance.Value = Align.CropPrecursors(result, allPSMs); * nbTargets = result.SetPrecursors(result.precursors); * dbOptions.ConSole.WriteLine("Targets after croping precursors : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * * dbOptions.productMassTolerance.Value = Align.CropProducts(result, allPSMs); * nbTargets = result.SetPrecursors(result.precursors); * dbOptions.ConSole.WriteLine("Targets after croping fragments : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * //*/ } //*/ allPSMs = null; long bestTargets = nbTargets; MSSearcher msSearcher = new MSSearcher(dbOptions, Project); msSearcher.CumulPsm(result.matchedPrecursors);//TODO Check if its still needed //Step 1 : Cluster psms together based on precursor feature //TODO Implement ProteoProfile Scoring based clustering //Group in clusters result.clusters = msSearcher.Search(result.matchedPrecursors, true); //Todo Align retention times //Todo redo clusterization, based on retention time aligned maps //Step 2 : Regroup based on peptide sequence (Morpheus code) PeptideSearcher pepSearcher = new PeptideSearcher(dbOptions); result.peptides = pepSearcher.Search(result.clusters, result.matchedPrecursors, true); result.peptideSequences = pepSearcher.Search(result.clusters, result.matchedPrecursors, false); //Step 3 : Regroup based on protein sequences (Morpheus code) ProteinSearcher protSearcher = new ProteinSearcher(dbOptions); result.proteins = protSearcher.SearchLatest(result.peptideSequences);//, dbSearcher.DicOfProteins); //long lastNbTarget = nbTargets; //do //{ // lastNbTarget = nbTargets; //UpdatePsmScores(result.proteins); // nbTargets = result.SetPrecursors(result.precursors); //} while (lastNbTarget < nbTargets);//*/ result.peptides = pepSearcher.Search(result.clusters, result.matchedPrecursors, true); result.peptideSequences = pepSearcher.Search(result.clusters, result.matchedPrecursors, false); if (optimize) { dbOptions.ConSole.WriteLine("Targets before second Optimization of Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); //result.matchedPrecursors.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result); nbTargets = result.SetPrecursors(result.precursors); dbOptions.ConSole.WriteLine("Targets after ReOptimizing PSM Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");//*/ } //Step 5 : Compute the new number of Targets nbTargets = result.SetPrecursors(result.precursors); if (nbTargets < bestTargets) { dbOptions.ConSole.WriteLine("FAILED to improve PSMs while adding protein and peptide information"); } dbOptions.ConSole.WriteLine(result.matchedPrecursors.Count + " precursors remaining after ProPheus Search!"); return(result); }//*/