}//*/ /// <summary> /// Latest version of the search routine. Associates spectrum to digested peptide sequences, /// aligns precursors and fragments, clusters common precursors accross samples, create the list of detected /// peptide and protein sequences. /// TODO Align maps together (retention time) /// </summary> /// <param name="queries"></param> /// <returns></returns> public Result SearchVersionAugust2013(Queries queries, bool optimize) { Result result = new Result(); result.queries = queries; result.dbOptions = dbOptions; result.samples = Project; DBSearcher dbSearcher = new DBSearcher(dbOptions); Digestion ps = new Digestion(dbOptions); if (dbOptions.NoEnzymeSearch) { result.SetPrecursors(dbSearcher.Search(queries, ps.DigestProteomeOnTheFlyNoEnzyme(AllProteins, queries))); } else { result.SetPrecursors(dbSearcher.Search(queries, ps.DigestProteomeOnTheFly(AllProteins, false, queries))); } dbOptions.ConSole.WriteLine(result.precursors.Count + " precursors matched !"); PeptideSpectrumMatches allPSMs = new PeptideSpectrumMatches(); foreach (Precursor precursor in result.precursors) { foreach (PeptideSpectrumMatch psm in precursor.psms_AllPossibilities) { allPSMs.Add(psm); } } //Add all psm possibilities to psms list foreach (PeptideSpectrumMatch psm in allPSMs) { psm.Query.precursor.psms.Add(psm); } long nbTargets = result.SetPrecursors(result.precursors); dbOptions.ConSole.WriteLine("Targets before Optimizing Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); if (optimize) { //allPSMs.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result); //result.matchedPrecursors.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result); nbTargets = result.SetPrecursors(result.precursors); dbOptions.ConSole.WriteLine("Targets : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); //*/ //TODO Improve alignment results /* * Align.AlignPrecursorsByDiff(result, allPSMs); * nbTargets = result.SetPrecursors(result.precursors); * dbOptions.ConSole.WriteLine("Targets after precursor alignment : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * * Align.AlignProductsByDiff(result, allPSMs); * nbTargets = result.SetPrecursors(result.precursors); * dbOptions.ConSole.WriteLine("Targets after fragment alignment : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * //*/ /* * dbOptions.precursorMassTolerance.Value = Align.CropPrecursors(result, allPSMs); * nbTargets = result.SetPrecursors(result.precursors); * dbOptions.ConSole.WriteLine("Targets after croping precursors : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * * dbOptions.productMassTolerance.Value = Align.CropProducts(result, allPSMs); * nbTargets = result.SetPrecursors(result.precursors); * dbOptions.ConSole.WriteLine("Targets after croping fragments : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * //*/ } //*/ allPSMs = null; long bestTargets = nbTargets; MSSearcher msSearcher = new MSSearcher(dbOptions, Project); msSearcher.CumulPsm(result.matchedPrecursors);//TODO Check if its still needed //Step 1 : Cluster psms together based on precursor feature //TODO Implement ProteoProfile Scoring based clustering //Group in clusters result.clusters = msSearcher.Search(result.matchedPrecursors, true); //Todo Align retention times //Todo redo clusterization, based on retention time aligned maps //Step 2 : Regroup based on peptide sequence (Morpheus code) PeptideSearcher pepSearcher = new PeptideSearcher(dbOptions); result.peptides = pepSearcher.Search(result.clusters, result.matchedPrecursors, true); result.peptideSequences = pepSearcher.Search(result.clusters, result.matchedPrecursors, false); //Step 3 : Regroup based on protein sequences (Morpheus code) ProteinSearcher protSearcher = new ProteinSearcher(dbOptions); result.proteins = protSearcher.SearchLatest(result.peptideSequences);//, dbSearcher.DicOfProteins); //long lastNbTarget = nbTargets; //do //{ // lastNbTarget = nbTargets; //UpdatePsmScores(result.proteins); // nbTargets = result.SetPrecursors(result.precursors); //} while (lastNbTarget < nbTargets);//*/ result.peptides = pepSearcher.Search(result.clusters, result.matchedPrecursors, true); result.peptideSequences = pepSearcher.Search(result.clusters, result.matchedPrecursors, false); if (optimize) { dbOptions.ConSole.WriteLine("Targets before second Optimization of Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); //result.matchedPrecursors.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result); nbTargets = result.SetPrecursors(result.precursors); dbOptions.ConSole.WriteLine("Targets after ReOptimizing PSM Score Ratios : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");//*/ } //Step 5 : Compute the new number of Targets nbTargets = result.SetPrecursors(result.precursors); if (nbTargets < bestTargets) { dbOptions.ConSole.WriteLine("FAILED to improve PSMs while adding protein and peptide information"); } dbOptions.ConSole.WriteLine(result.matchedPrecursors.Count + " precursors remaining after ProPheus Search!"); return(result); }//*/
public void Export(double fdr, string keyword = "", bool onlyPrecursors = false) { dbOptions.ConSole.WriteLine("Exporting at " + (fdr * 100) + "% FDR (Decoy/Target)..."); List <Precursor> prec = null; if (precursors != null) { if (matchedPrecursors == null) { this.matchedPrecursors = new Precursors(); foreach (Precursor precursor in precursors) { if (precursor.psms.Count > 0) { matchedPrecursors.Add(precursor); } } }/* * List<Precursor> prec = FDR.PrecursorsV2(precursors, fdr, 1); * Sol.CONSOLE.OutputLine("> " + prec.Count + " Precursors"); * MSSearcher.Export(dbOptions.outputFolder + keyword + "precursors.csv", prec);//*/ /* * prec = Optimizer.PrecursorOptimizer(matchedPrecursors, fdr); * Sol.CONSOLE.OutputLine("> " + prec.Count + " Optimized Precursors"); * MSSearcher.Export(dbOptions.outputFolder + keyword + "Optimized_precursors.csv", prec);//*/ prec = matchedPrecursors.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + prec.Count + " Uptimized V5 Precursors"); MSSearcher.Export(dbOptions.OutputFolder + keyword + "UptimizedV5_precursors.csv", prec); } if (!onlyPrecursors) { if (queries != null) { List <Query> qs = queries.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + qs.Count + " PSMs (Top 10)"); MSSearcher.Export(dbOptions.OutputFolder + keyword + "queries.csv", qs); } if (clusters != null) { //List<PeptideSpectrumMatch> psms = FDR.PSMs(clusters, fdr, 1, 10); //dbOptions.ConSole.WriteLine("> " + psms.Count + " PSMs (Top 10)"); //MSSearcher.Export(dbOptions.outputFolder + keyword + "psms_Top10.csv", psms); //psms = FDR.PSMs(clusters, fdr, 1, 1); //dbOptions.ConSole.WriteLine("> " + psms.Count + " PSMs"); //MSSearcher.Export(dbOptions.outputFolder + keyword + "psms_Best.csv", psms); } if (peptides != null) { List <PeptideMatch> pep = peptideSequences.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + pep.Count + " Peptides Sequences (Version 5)"); PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptideSequencesV5_.csv", pep); PeptideSearcher sr = new PeptideSearcher(dbOptions); PeptideMatches seqs = sr.Search(clusters, prec, false); pep = seqs.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + pep.Count + " Peptides Sequences (Version 5b)"); PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptideSequencesV5b_PrecursorFDRed.csv", pep); pep = peptides.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + pep.Count + " Peptides (Version 5)"); PeptideSearcher.Export(dbOptions.OutputFolder + keyword + "peptidesV5_.csv", pep); } if (proteins != null) { List <ProteinGroupMatch> prots = proteins.ComputeAtFDR(fdr); dbOptions.ConSole.WriteLine("> " + prots.Count + " Proteins"); ProteinSearcher.Export(dbOptions.OutputFolder + keyword + "proteins_.csv", prots); } } }
public Result SearchLatestVersion(Queries queries, bool optimize, bool runCluster = false) { Result result = new Result(); result.queries = queries; result.dbOptions = dbOptions; result.samples = Project; DBSearcher dbSearcher = new DBSearcher(dbOptions); Digestion ps = new Digestion(dbOptions); if (dbOptions.NoEnzymeSearch) { result.SetPrecursors(dbSearcher.Search(queries, ps.DigestProteomeOnTheFlyNoEnzyme(AllProteins, queries))); } else { result.SetPrecursors(dbSearcher.Search(queries, ps.DigestProteomeOnTheFly(AllProteins, false, queries))); } dbOptions.ConSole.WriteLine(result.precursors.Count + " precursors matched !"); //Use gradient descent to figure out thresholds on PSMs foreach (Precursor precursor in result.precursors) { foreach (PeptideSpectrumMatch psm in precursor.psms_AllPossibilities) { precursor.psms.Add(psm); } } long nbTargets = result.SetPrecursors(result.precursors); /*//This approach is unstable ... * dbOptions.ConSole.WriteLine("Targets before Optimizing PSM Thresholds : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); * * PeptideSpectrumMatches allMatches = new PeptideSpectrumMatches(); * foreach (Precursor precursor in result.precursors) * allMatches.AddRange(precursor.psms_AllPossibilities); * PSMScoreThreshold threshold = allMatches.ComputeScoreThreshold(dbOptions.PSMFalseDiscoveryRate); * foreach (Precursor precursor in result.precursors) * { * if (precursor.psms.Count > 0) * { * precursor.psms.Clear(); * foreach (PeptideSpectrumMatch psm in precursor.psms_AllPossibilities) * if (threshold.KeepPSM(psm)) * precursor.psms.Add(psm); * } * } * * nbTargets = result.SetPrecursors(result.precursors); * //*/ dbOptions.ConSole.WriteLine("Targets after Optimizing PSM Thresholds : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); //*/ long bestTargets = nbTargets; MSSearcher msSearcher = new MSSearcher(dbOptions, Project); msSearcher.CumulPsm(result.matchedPrecursors);//TODO Check if its still needed //Step 1 : Cluster psms together based on precursor feature //TODO Implement ProteoProfile Scoring based clustering //Group in clusters result.clusters = msSearcher.Search(result.matchedPrecursors, runCluster); //Todo Align retention times //Todo redo clusterization, based on retention time aligned maps //Step 2 : Regroup based on peptide sequence PeptideSearcher pepSearcher = new PeptideSearcher(dbOptions); if (optimize) { //result.peptides = UpdatePsmScores(pepSearcher.SearchAll(result.clusters, result.matchedPrecursors, true), result); //dbOptions.ConSole.WriteLine("Found peptides from UpdatePSMScore routine : " + result.peptides.Count); //nbTargets = result.SetPrecursors(result.precursors); //dbOptions.ConSole.WriteLine("Targets after Updating PSM scores : " + nbTargets + " [" + result.matchedPrecursors.Count + "]"); /* * PeptideSpectrumMatches allPSMs = new PeptideSpectrumMatches(); * foreach (Precursor precursor in result.precursors) * foreach (PeptideSpectrumMatch psm in precursor.psms) * allPSMs.Add(psm); * * allPSMs.OptimizePSMScoreRatios(dbOptions, dbOptions.PSMFalseDiscoveryRate, result); * //*/ } result.peptides = pepSearcher.SearchClusters(result.clusters, result.matchedPrecursors, true); result.peptideSequences = pepSearcher.SearchClusters(result.clusters, result.matchedPrecursors, false); dbOptions.ConSole.WriteLine("Found peptides from Searchclusters routine : " + result.peptides.Count); nbTargets = result.SetPrecursors(result.precursors); dbOptions.ConSole.WriteLine("Targets after ReRanking peptides : " + nbTargets + " [" + result.matchedPrecursors.Count + "]");//*/ dbOptions.ConSole.WriteLine(result.matchedPrecursors.Count + " precursors remaining after ProPheus Search!"); return(result); }//*/