public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFlyFast(List <Protein> proteins, bool allowSNP, Queries AllQueries) { foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP)) { //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance)); //if (firstIndex >= 0 && firstIndex < AllQueries.Count) // yield return new Tuple<Peptide, int>(peptide, firstIndex); //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries)) //if (!TargetPeptides.Contains(peptide.BaseSequence)) //{ foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms)) { modPeptide.SetFixedModifications(options.fixedModifications); int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance)); if (firstIndex >= 0 && firstIndex < AllQueries.Count) { yield return(new Tuple <Peptide, int>(modPeptide, firstIndex)); } } //TODO check if this favors targets over decoys since proteins are sorted target->deco // TargetPeptides.Add(peptide.BaseSequence); //} } }
/* * public IEnumerable<int> GetIndexOfMassesInRange(double theoMass, MassTolerance productMassTolerance) * { * double minimum_precursor_mass = theoMass - productMassTolerance; * double maximum_precursor_mass = theoMass + productMassTolerance; * int mid_index = BinarySearch(theoMass); * if (mid_index < Peaks.Count)//.Length) * { * for (int i = mid_index; i >= 0 && Peaks[i].Mass >= minimum_precursor_mass; i--) * { * if (Peaks[i].Mass <= maximum_precursor_mass) * yield return i; * } * * for (int i = mid_index + 1; i < Peaks.Count && Peaks[i].Mass <= maximum_precursor_mass; i++) * { * if (Peaks[i].Mass >= minimum_precursor_mass) * yield return i; * } * } * }//*/ //TODO public IEnumerable <int> GetIndexOfMZInRange(double theoMz, MassTolerance productMassTolerance) { double minimum_precursor_mass = theoMz - productMassTolerance; double maximum_precursor_mass = theoMz + productMassTolerance; int mid_index = BinarySearchMz(theoMz); if (mid_index < Peaks.Count)//.Length) { for (int i = mid_index; i >= 0 && Peaks[i].MZ >= minimum_precursor_mass; i--) { if (Peaks[i].MZ <= maximum_precursor_mass) { yield return(i); } } for (int i = mid_index + 1; i < Peaks.Count && Peaks[i].MZ <= maximum_precursor_mass; i++) { if (Peaks[i].MZ >= minimum_precursor_mass) { yield return(i); } } } }
public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFly(List <Protein> proteins, bool allowSNP, Queries AllQueries) { //HashSet<string> TargetPeptides = new HashSet<string>(); //Dictionary<string, int> TargetPeptides = new Dictionary<string, int>(); //Digest proteins and store peptides in a Dictionnary //Does not fit in memory -> 360 Go .... //dicOfPeptideSequences = new Dictionary<string, List<Protein>>(); //double minimumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.minimumPrecursorMonoisotopicPeakOffset : 0; //double maximumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.maximumPrecursorMonoisotopicPeakOffset : 0; foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP)) { //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance)); //if (firstIndex >= 0 && firstIndex < AllQueries.Count) // yield return new Tuple<Peptide, int>(peptide, firstIndex); //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries)) //if (!TargetPeptides.Contains(peptide.BaseSequence)) //{ foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms)) { modPeptide.SetFixedModifications(options.fixedModifications); int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance)); if (firstIndex >= 0 && firstIndex < AllQueries.Count) { yield return(new Tuple <Peptide, int>(modPeptide, firstIndex)); } } //TODO check if this favors targets over decoys since proteins are sorted target->deco // TargetPeptides.Add(peptide.BaseSequence); //} } }
public static double MzFloor(double left, MassTolerance right) { if (right.Units == MassToleranceUnits.Da) { return(left - right.Value); } else { return(left - left * right.Value / 1e6); } }
public IEnumerable <Query> GetQueryInMassRange(double precursorMass, MassTolerance precursorMassTolerance) { double minimum_precursor_mass = precursorMass - precursorMassTolerance; double maximum_precursor_mass = precursorMass + precursorMassTolerance; int low_index = BinarySearch(minimum_precursor_mass); if (low_index >= 0 && low_index < Count && this[low_index].precursor.Mass >= minimum_precursor_mass) { for (int i = low_index; i < Count && this[i].precursor.Mass <= maximum_precursor_mass; i++) { yield return(this[i]); } } }
public DBOptions(string fasta, IConSol console = null) { if (console == null) { ConSole = new ConSolCommandLine(); } else { ConSole = console; } //Create with default values this.DecoyFusion = true; this.FastaDatabaseFilepath = fasta; this.MaximumPeptideMass = 10000; ProteaseDictionary proteases = ProteaseDictionary.Instance; this.DigestionEnzyme = proteases["no enzyme"]; // proteases["trypsin (no proline rule)"]; this.NoEnzymeSearch = true; this.ToleratedMissedCleavages = 100; // 3;//determines the length of peptides with no-enzyme option this.initiatorMethionineBehavior = InitiatorMethionineBehavior.Variable; this.fixedModifications = new GraphML_List <Modification>(); this.variableModifications = new GraphML_List <Modification>(); this.maximumVariableModificationIsoforms = 1024; this.MinimumPrecursorChargeState = 1; this.MaximumPrecursorChargeState = 4; this.MaximumNumberOfFragmentsPerSpectrum = 400; //TODO Add precision to the precursor by reading MS part of file this.precursorMassTolerance = new MassTolerance(0.005, MassToleranceUnits.Da);//2.1 //TODO Add precision to the product masses by reading corresponding MS part of raw file this.productMassTolerance = new MassTolerance(0.005, MassToleranceUnits.Da); this.PSMFalseDiscoveryRate = 0.25; // 0.05; this.OutputFolder = @"C:\_IRIC\DATA\Test2"; //C:\Documents and Settings\ProteoAdmin\Desktop\AEffacer\Morpheus\Output"; this.MinimumPSMScore = 0.0001; }
/// <summary> /// Maps all peptide sequences to potential precursors and spectrum /// </summary> /// <param name="queries"></param> /// <param name="fittingPeptides"></param> /// <param name="previousProteins"></param> /// <returns></returns> public Precursors Search(Queries queries, IEnumerable <Tuple <Peptide, int> > fittingPeptides) { queries.dbOptions.ConSole.WriteLine("Mapping " + queries.Count + " queries to the digested proteome ... "); long nbQueryConsidered = 0; Parallel.ForEach <Tuple <Peptide, int> >(fittingPeptides, (Tuple <Peptide, int> hit) => //foreach (Tuple<Peptide, int> hit in fittingPeptides) { int indexPrecursor = hit.Item2; double maximumMass = MassTolerance.MzTop(hit.Item1.MonoisotopicMass, options.precursorMassTolerance); double minimumMass = MassTolerance.MzFloor(hit.Item1.MonoisotopicMass, options.precursorMassTolerance); if (indexPrecursor < queries.Count && queries[indexPrecursor].precursor.Mass >= minimumMass) { while (indexPrecursor < queries.Count && queries[indexPrecursor].precursor.Mass <= maximumMass) { lock (queries[indexPrecursor].psms) { //if (low_index < Count && this[low_index].precursor.Mass >= minimum_precursor_mass) // foreach (Query query in queries.GetQueryInMassRange(modified_peptide.MonoisotopicMass, options.precursorMassTolerance)) //{ //Target (or decoy with enzyme digests) ComputePSMs(queries[indexPrecursor], hit.Item1); //Decoy if NoEnzyme digest if (options.DecoyFusion) { ComputePSMs(queries[indexPrecursor], hit.Item1.Reverse()); } indexPrecursor++; //foreach (Precursor isotope in query.precursor.Isotopes) // ComputePSMs(query, modified_peptide, isotope.MassShift, previousProteins); } } nbQueryConsidered += indexPrecursor - hit.Item2; } else { options.ConSole.WriteLine("WTF####"); } }); //Push PSMs to Precursor foreach (Query query in queries) { query.precursor.psms_AllPossibilities.AddRange(query.psms);//No MERGE /* * //Push PSMs to precursors * if (query.precursor.psms_AllPossibilities.Count == 0) * query.precursor.psms_AllPossibilities.AddRange(query.psms); * else * { * //Merge common entries * foreach (PeptideSpectrumMatch psmQuery in query.psms) * { * bool isNew = true; * foreach (PeptideSpectrumMatch psmPrecursor in query.precursor.psms_AllPossibilities) * { * if (psmPrecursor != psmQuery && psmPrecursor.Peptide == psmQuery.Peptide)//Peptide object should be the same * { * psmPrecursor.Merge(psmQuery, options); * isNew = false; * break; * } * } * if (isNew) * query.precursor.psms_AllPossibilities.Add(psmQuery); * } * }//*/ } //PeptideSpectrumMatches allPsms = new PeptideSpectrumMatches(); int nbAssignedPrecursor = 0; foreach (Precursor precursor in queries.Precursors) { if (precursor.psms_AllPossibilities.Count > 0) { nbAssignedPrecursor++; // allPsms.AddRange(precursor.psms); } } //TODO Check the impact of this approach!!!! //List<PeptideSpectrumMatch> okPSMs = allPsms.ComputeAtFDR(0.05);//TODO Add parameter for this value //Dictionary<PeptideSpectrumMatch, int> dicOfPsm = new Dictionary<PeptideSpectrumMatch, int>(); //foreach(PeptideSpectrumMatch match in okPSMs) // dicOfPsm.Add(match, 0); //foreach (Precursor precursor in spectra.Precursors) //{ // List<PeptideSpectrumMatch> newList = new List<PeptideSpectrumMatch>(); // foreach (PeptideSpectrumMatch psm in precursor.psms) // if (dicOfPsm.ContainsKey(psm)) // newList.Add(psm); // precursor.psms = newList; //} int nbAssignedQuery = 0; foreach (Query query in queries) { if (query.psms.Count > 0) { nbAssignedQuery++; } } options.ConSole.WriteLine(nbAssignedQuery + " queries matched [" + nbAssignedPrecursor + " precursors] out of " + nbQueryConsidered + " psm computed"); return(queries.Precursors); }
private static GraphML_List <MsMsPeak> Deisotopebkp(GraphML_List <MsMsPeak> peaks, int maxCharge, MassTolerance isotopicMzTolerance) { GraphML_List <MsMsPeak> new_peaks = new GraphML_List <MsMsPeak>(peaks); peaks.Sort(MsMsPeak.AscendingMzComparison); for (int lowMassIndex = 0; lowMassIndex < new_peaks.Count - 1; lowMassIndex++) { if (new_peaks[lowMassIndex].Charge > 0) { int toRemove = -1; double bestMassError = isotopicMzTolerance.Value; double aim = Numerics.IsotopicMassShift(1, new_peaks[lowMassIndex].Charge) + new_peaks[lowMassIndex].MZ; int potentialIsotopeIndex = lowMassIndex + 1; while (potentialIsotopeIndex < new_peaks.Count && new_peaks[potentialIsotopeIndex].MZ < aim + bestMassError) { if (new_peaks[lowMassIndex].Intensity > new_peaks[potentialIsotopeIndex].Intensity) { double massError = Math.Abs(Numerics.CalculateMassError(new_peaks[potentialIsotopeIndex].MZ, aim, isotopicMzTolerance.Units)); if (massError < bestMassError) { bestMassError = massError; toRemove = potentialIsotopeIndex; } } potentialIsotopeIndex++; } if (toRemove > 0) { new_peaks[lowMassIndex].Intensity += new_peaks[toRemove].Intensity; new_peaks.RemoveAt(toRemove); } } } return(new_peaks); }
private static GraphML_List <MsMsPeak> AssignChargeStatesAndDeisotope(GraphML_List <MsMsPeak> peaks, int maxCharge, MassTolerance isotopicMzTolerance) { GraphML_List <MsMsPeak> new_peaks = new GraphML_List <MsMsPeak>(peaks); //peaks.Sort(MSPeak.AscendingMzComparison); int[] bestIsotopes = new int[4]; int[] currentIsotopes = new int[4]; for (int lowMassIndex = 0; lowMassIndex < new_peaks.Count - 1; lowMassIndex++) { double bestChargeScore = 0; int bestCharge = 0; bestIsotopes[0] = 0; bestIsotopes[1] = 0; bestIsotopes[2] = 0; bestIsotopes[3] = 0; for (int charge = maxCharge; charge > 0; charge--) { currentIsotopes[0] = 0; currentIsotopes[1] = 0; currentIsotopes[2] = 0; currentIsotopes[3] = 0; double score = 0; int potentialIsotopeIndex = lowMassIndex + 1; for (int isotope = 1; isotope <= 4; isotope++) { double bestMassError = isotopicMzTolerance.Value; double aim = Numerics.IsotopicMassShift(isotope, charge) + new_peaks[lowMassIndex].MZ; while (potentialIsotopeIndex < new_peaks.Count && new_peaks[potentialIsotopeIndex].MZ < aim + bestMassError) { if (new_peaks[lowMassIndex].Intensity > new_peaks[potentialIsotopeIndex].Intensity) { double massError = Math.Abs(Numerics.CalculateMassError(new_peaks[potentialIsotopeIndex].MZ, aim, isotopicMzTolerance.Units)); if (massError < bestMassError) { bestMassError = massError; currentIsotopes[isotope - 1] = potentialIsotopeIndex; } } potentialIsotopeIndex++; } score += isotopicMzTolerance.Value - bestMassError; if (score == 0) { break; } ; } if (score > bestChargeScore) { bestIsotopes[0] = currentIsotopes[0]; bestIsotopes[1] = currentIsotopes[1]; bestIsotopes[2] = currentIsotopes[2]; bestIsotopes[3] = currentIsotopes[3]; bestChargeScore = score; bestCharge = charge; } } new_peaks[lowMassIndex].Charge = bestCharge; for (int i = 3; i >= 0; i--) { if (bestIsotopes[i] > 0) { new_peaks[lowMassIndex].Intensity += new_peaks[bestIsotopes[i]].Intensity; new_peaks.RemoveAt(bestIsotopes[i]); } } } return(new_peaks); }
private static GraphML_List <MsMsPeak> AssignChargeStatesbkp(GraphML_List <MsMsPeak> peaks, int maxCharge, MassTolerance isotopicMzTolerance) { GraphML_List <MsMsPeak> new_peaks = new GraphML_List <MsMsPeak>(); for (int i = 0; i < peaks.Count - 1; i++) { int j = i + 1; List <int> charges = new List <int>(); while (j < peaks.Count) { if (peaks[j].MZ > (peaks[i].MZ + Constants.C12_C13_MASS_DIFFERENCE) + isotopicMzTolerance) { break; } for (int c = maxCharge; c >= 1; c--) { if (Math.Abs(Numerics.CalculateMassError(peaks[j].MZ, peaks[i].MZ + Constants.C12_C13_MASS_DIFFERENCE / (double)c, isotopicMzTolerance.Units)) <= isotopicMzTolerance.Value) { new_peaks.Add(new MsMsPeak(peaks[i].MZ, peaks[i].Intensity, c)); charges.Add(c); } } j++; } if (charges.Count == 0) { new_peaks.Add(new MsMsPeak(peaks[i].MZ, peaks[i].Intensity, 0)); } } return(new_peaks); }