public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFlyFast(List <Protein> proteins, bool allowSNP, Queries AllQueries) { foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP)) { //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance)); //if (firstIndex >= 0 && firstIndex < AllQueries.Count) // yield return new Tuple<Peptide, int>(peptide, firstIndex); //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries)) //if (!TargetPeptides.Contains(peptide.BaseSequence)) //{ foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms)) { modPeptide.SetFixedModifications(options.fixedModifications); int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance)); if (firstIndex >= 0 && firstIndex < AllQueries.Count) { yield return(new Tuple <Peptide, int>(modPeptide, firstIndex)); } } //TODO check if this favors targets over decoys since proteins are sorted target->deco // TargetPeptides.Add(peptide.BaseSequence); //} } }
public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFly(List <Protein> proteins, bool allowSNP, Queries AllQueries) { //HashSet<string> TargetPeptides = new HashSet<string>(); //Dictionary<string, int> TargetPeptides = new Dictionary<string, int>(); //Digest proteins and store peptides in a Dictionnary //Does not fit in memory -> 360 Go .... //dicOfPeptideSequences = new Dictionary<string, List<Protein>>(); //double minimumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.minimumPrecursorMonoisotopicPeakOffset : 0; //double maximumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.maximumPrecursorMonoisotopicPeakOffset : 0; foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP)) { //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance)); //if (firstIndex >= 0 && firstIndex < AllQueries.Count) // yield return new Tuple<Peptide, int>(peptide, firstIndex); //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries)) //if (!TargetPeptides.Contains(peptide.BaseSequence)) //{ foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms)) { modPeptide.SetFixedModifications(options.fixedModifications); int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance)); if (firstIndex >= 0 && firstIndex < AllQueries.Count) { yield return(new Tuple <Peptide, int>(modPeptide, firstIndex)); } } //TODO check if this favors targets over decoys since proteins are sorted target->deco // TargetPeptides.Add(peptide.BaseSequence); //} } }
/// <summary> /// Maps all peptide sequences to potential precursors and spectrum /// </summary> /// <param name="queries"></param> /// <param name="fittingPeptides"></param> /// <param name="previousProteins"></param> /// <returns></returns> public Precursors Search(Queries queries, IEnumerable <Tuple <Peptide, int> > fittingPeptides) { queries.dbOptions.ConSole.WriteLine("Mapping " + queries.Count + " queries to the digested proteome ... "); long nbQueryConsidered = 0; Parallel.ForEach <Tuple <Peptide, int> >(fittingPeptides, (Tuple <Peptide, int> hit) => //foreach (Tuple<Peptide, int> hit in fittingPeptides) { int indexPrecursor = hit.Item2; double maximumMass = MassTolerance.MzTop(hit.Item1.MonoisotopicMass, options.precursorMassTolerance); double minimumMass = MassTolerance.MzFloor(hit.Item1.MonoisotopicMass, options.precursorMassTolerance); if (indexPrecursor < queries.Count && queries[indexPrecursor].precursor.Mass >= minimumMass) { while (indexPrecursor < queries.Count && queries[indexPrecursor].precursor.Mass <= maximumMass) { lock (queries[indexPrecursor].psms) { //if (low_index < Count && this[low_index].precursor.Mass >= minimum_precursor_mass) // foreach (Query query in queries.GetQueryInMassRange(modified_peptide.MonoisotopicMass, options.precursorMassTolerance)) //{ //Target (or decoy with enzyme digests) ComputePSMs(queries[indexPrecursor], hit.Item1); //Decoy if NoEnzyme digest if (options.DecoyFusion) { ComputePSMs(queries[indexPrecursor], hit.Item1.Reverse()); } indexPrecursor++; //foreach (Precursor isotope in query.precursor.Isotopes) // ComputePSMs(query, modified_peptide, isotope.MassShift, previousProteins); } } nbQueryConsidered += indexPrecursor - hit.Item2; } else { options.ConSole.WriteLine("WTF####"); } }); //Push PSMs to Precursor foreach (Query query in queries) { query.precursor.psms_AllPossibilities.AddRange(query.psms);//No MERGE /* * //Push PSMs to precursors * if (query.precursor.psms_AllPossibilities.Count == 0) * query.precursor.psms_AllPossibilities.AddRange(query.psms); * else * { * //Merge common entries * foreach (PeptideSpectrumMatch psmQuery in query.psms) * { * bool isNew = true; * foreach (PeptideSpectrumMatch psmPrecursor in query.precursor.psms_AllPossibilities) * { * if (psmPrecursor != psmQuery && psmPrecursor.Peptide == psmQuery.Peptide)//Peptide object should be the same * { * psmPrecursor.Merge(psmQuery, options); * isNew = false; * break; * } * } * if (isNew) * query.precursor.psms_AllPossibilities.Add(psmQuery); * } * }//*/ } //PeptideSpectrumMatches allPsms = new PeptideSpectrumMatches(); int nbAssignedPrecursor = 0; foreach (Precursor precursor in queries.Precursors) { if (precursor.psms_AllPossibilities.Count > 0) { nbAssignedPrecursor++; // allPsms.AddRange(precursor.psms); } } //TODO Check the impact of this approach!!!! //List<PeptideSpectrumMatch> okPSMs = allPsms.ComputeAtFDR(0.05);//TODO Add parameter for this value //Dictionary<PeptideSpectrumMatch, int> dicOfPsm = new Dictionary<PeptideSpectrumMatch, int>(); //foreach(PeptideSpectrumMatch match in okPSMs) // dicOfPsm.Add(match, 0); //foreach (Precursor precursor in spectra.Precursors) //{ // List<PeptideSpectrumMatch> newList = new List<PeptideSpectrumMatch>(); // foreach (PeptideSpectrumMatch psm in precursor.psms) // if (dicOfPsm.ContainsKey(psm)) // newList.Add(psm); // precursor.psms = newList; //} int nbAssignedQuery = 0; foreach (Query query in queries) { if (query.psms.Count > 0) { nbAssignedQuery++; } } options.ConSole.WriteLine(nbAssignedQuery + " queries matched [" + nbAssignedPrecursor + " precursors] out of " + nbQueryConsidered + " psm computed"); return(queries.Precursors); }