public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFlyFast(List <Protein> proteins, bool allowSNP, Queries AllQueries)
        {
            foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP))
            {
                //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance));
                //if (firstIndex >= 0 && firstIndex < AllQueries.Count)
                //    yield return new Tuple<Peptide, int>(peptide, firstIndex);

                //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries))
                //if (!TargetPeptides.Contains(peptide.BaseSequence))
                //{
                foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms))
                {
                    modPeptide.SetFixedModifications(options.fixedModifications);
                    int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance));
                    if (firstIndex >= 0 && firstIndex < AllQueries.Count)
                    {
                        yield return(new Tuple <Peptide, int>(modPeptide, firstIndex));
                    }
                }

                //TODO check if this favors targets over decoys since proteins are sorted target->deco
                //    TargetPeptides.Add(peptide.BaseSequence);
                //}
            }
        }
        /*
         * public IEnumerable<int> GetIndexOfMassesInRange(double theoMass, MassTolerance productMassTolerance)
         * {
         *  double minimum_precursor_mass = theoMass - productMassTolerance;
         *  double maximum_precursor_mass = theoMass + productMassTolerance;
         *  int mid_index = BinarySearch(theoMass);
         *  if (mid_index < Peaks.Count)//.Length)
         *  {
         *      for (int i = mid_index; i >= 0 && Peaks[i].Mass >= minimum_precursor_mass; i--)
         *      {
         *          if (Peaks[i].Mass <= maximum_precursor_mass)
         *              yield return i;
         *      }
         *
         *      for (int i = mid_index + 1; i < Peaks.Count && Peaks[i].Mass <= maximum_precursor_mass; i++)
         *      {
         *          if (Peaks[i].Mass >= minimum_precursor_mass)
         *              yield return i;
         *      }
         *  }
         * }//*/
        //TODO
        public IEnumerable <int> GetIndexOfMZInRange(double theoMz, MassTolerance productMassTolerance)
        {
            double minimum_precursor_mass = theoMz - productMassTolerance;
            double maximum_precursor_mass = theoMz + productMassTolerance;
            int    mid_index = BinarySearchMz(theoMz);

            if (mid_index < Peaks.Count)//.Length)
            {
                for (int i = mid_index; i >= 0 && Peaks[i].MZ >= minimum_precursor_mass; i--)
                {
                    if (Peaks[i].MZ <= maximum_precursor_mass)
                    {
                        yield return(i);
                    }
                }

                for (int i = mid_index + 1; i < Peaks.Count && Peaks[i].MZ <= maximum_precursor_mass; i++)
                {
                    if (Peaks[i].MZ >= minimum_precursor_mass)
                    {
                        yield return(i);
                    }
                }
            }
        }
        public IEnumerable <Tuple <Peptide, int> > DigestProteomeOnTheFly(List <Protein> proteins, bool allowSNP, Queries AllQueries)
        {
            //HashSet<string> TargetPeptides = new HashSet<string>();
            //Dictionary<string, int> TargetPeptides = new Dictionary<string, int>();
            //Digest proteins and store peptides in a Dictionnary
            //Does not fit in memory -> 360 Go ....
            //dicOfPeptideSequences = new Dictionary<string, List<Protein>>();
            //double minimumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.minimumPrecursorMonoisotopicPeakOffset : 0;
            //double maximumMonoisotopicPeakOffset = dbOptions.precursorMonoisotopicPeakCorrection ? dbOptions.maximumPrecursorMonoisotopicPeakOffset : 0;
            foreach (Peptide peptide in ProteinSearcher.ProteinDigest(options, proteins, allowSNP))
            {
                //int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(peptide.MonoisotopicMass, options.precursorMassTolerance));
                //if (firstIndex >= 0 && firstIndex < AllQueries.Count)
                //    yield return new Tuple<Peptide, int>(peptide, firstIndex);

                //foreach (Peptide peptide in ProteinSearcher.ProteinDigestNoEnzyme(dbOptions, proteins, AllQueries))
                //if (!TargetPeptides.Contains(peptide.BaseSequence))
                //{
                foreach (Peptide modPeptide in peptide.GetVariablyModifiedPeptides(options.variableModifications, options.maximumVariableModificationIsoforms))
                {
                    modPeptide.SetFixedModifications(options.fixedModifications);
                    int firstIndex = AllQueries.BinarySearch(MassTolerance.MzFloor(modPeptide.MonoisotopicMass, options.precursorMassTolerance));
                    if (firstIndex >= 0 && firstIndex < AllQueries.Count)
                    {
                        yield return(new Tuple <Peptide, int>(modPeptide, firstIndex));
                    }
                }

                //TODO check if this favors targets over decoys since proteins are sorted target->deco
                //    TargetPeptides.Add(peptide.BaseSequence);
                //}
            }
        }
 public static double MzFloor(double left, MassTolerance right)
 {
     if (right.Units == MassToleranceUnits.Da)
     {
         return(left - right.Value);
     }
     else
     {
         return(left - left * right.Value / 1e6);
     }
 }
Beispiel #5
0
        public IEnumerable <Query> GetQueryInMassRange(double precursorMass, MassTolerance precursorMassTolerance)
        {
            double minimum_precursor_mass = precursorMass - precursorMassTolerance;
            double maximum_precursor_mass = precursorMass + precursorMassTolerance;
            int    low_index = BinarySearch(minimum_precursor_mass);

            if (low_index >= 0 && low_index < Count && this[low_index].precursor.Mass >= minimum_precursor_mass)
            {
                for (int i = low_index; i < Count && this[i].precursor.Mass <= maximum_precursor_mass; i++)
                {
                    yield return(this[i]);
                }
            }
        }
Beispiel #6
0
        public DBOptions(string fasta, IConSol console = null)
        {
            if (console == null)
            {
                ConSole = new ConSolCommandLine();
            }
            else
            {
                ConSole = console;
            }
            //Create with default values
            this.DecoyFusion           = true;
            this.FastaDatabaseFilepath = fasta;
            this.MaximumPeptideMass    = 10000;
            ProteaseDictionary proteases = ProteaseDictionary.Instance;

            this.DigestionEnzyme                     = proteases["no enzyme"]; // proteases["trypsin (no proline rule)"];
            this.NoEnzymeSearch                      = true;
            this.ToleratedMissedCleavages            = 100;                    // 3;//determines the length of peptides with no-enzyme option
            this.initiatorMethionineBehavior         = InitiatorMethionineBehavior.Variable;
            this.fixedModifications                  = new GraphML_List <Modification>();
            this.variableModifications               = new GraphML_List <Modification>();
            this.maximumVariableModificationIsoforms = 1024;

            this.MinimumPrecursorChargeState         = 1;
            this.MaximumPrecursorChargeState         = 4;
            this.MaximumNumberOfFragmentsPerSpectrum = 400;
            //TODO Add precision to the precursor by reading MS part of file
            this.precursorMassTolerance = new MassTolerance(0.005, MassToleranceUnits.Da);//2.1
            //TODO Add precision to the product masses by reading corresponding MS part of raw file
            this.productMassTolerance = new MassTolerance(0.005, MassToleranceUnits.Da);

            this.PSMFalseDiscoveryRate = 0.25;             // 0.05;

            this.OutputFolder    = @"C:\_IRIC\DATA\Test2"; //C:\Documents and Settings\ProteoAdmin\Desktop\AEffacer\Morpheus\Output";
            this.MinimumPSMScore = 0.0001;
        }
Beispiel #7
0
        /// <summary>
        /// Maps all peptide sequences to potential precursors and spectrum
        /// </summary>
        /// <param name="queries"></param>
        /// <param name="fittingPeptides"></param>
        /// <param name="previousProteins"></param>
        /// <returns></returns>
        public Precursors Search(Queries queries, IEnumerable <Tuple <Peptide, int> > fittingPeptides)
        {
            queries.dbOptions.ConSole.WriteLine("Mapping " + queries.Count + " queries to the digested proteome ... ");

            long nbQueryConsidered = 0;

            Parallel.ForEach <Tuple <Peptide, int> >(fittingPeptides, (Tuple <Peptide, int> hit) =>
                                                     //foreach (Tuple<Peptide, int> hit in fittingPeptides)
            {
                int indexPrecursor = hit.Item2;
                double maximumMass = MassTolerance.MzTop(hit.Item1.MonoisotopicMass, options.precursorMassTolerance);
                double minimumMass = MassTolerance.MzFloor(hit.Item1.MonoisotopicMass, options.precursorMassTolerance);

                if (indexPrecursor < queries.Count && queries[indexPrecursor].precursor.Mass >= minimumMass)
                {
                    while (indexPrecursor < queries.Count && queries[indexPrecursor].precursor.Mass <= maximumMass)
                    {
                        lock (queries[indexPrecursor].psms)
                        {
                            //if (low_index < Count && this[low_index].precursor.Mass >= minimum_precursor_mass)
                            //  foreach (Query query in queries.GetQueryInMassRange(modified_peptide.MonoisotopicMass, options.precursorMassTolerance))
                            //{
                            //Target (or decoy with enzyme digests)
                            ComputePSMs(queries[indexPrecursor], hit.Item1);

                            //Decoy if NoEnzyme digest
                            if (options.DecoyFusion)
                            {
                                ComputePSMs(queries[indexPrecursor], hit.Item1.Reverse());
                            }

                            indexPrecursor++;

                            //foreach (Precursor isotope in query.precursor.Isotopes)
                            //    ComputePSMs(query, modified_peptide, isotope.MassShift, previousProteins);
                        }
                    }

                    nbQueryConsidered += indexPrecursor - hit.Item2;
                }
                else
                {
                    options.ConSole.WriteLine("WTF####");
                }
            });

            //Push PSMs to Precursor
            foreach (Query query in queries)
            {
                query.precursor.psms_AllPossibilities.AddRange(query.psms);//No MERGE

                /*
                 * //Push PSMs to precursors
                 * if (query.precursor.psms_AllPossibilities.Count == 0)
                 *  query.precursor.psms_AllPossibilities.AddRange(query.psms);
                 * else
                 * {
                 *  //Merge common entries
                 *  foreach (PeptideSpectrumMatch psmQuery in query.psms)
                 *  {
                 *      bool isNew = true;
                 *      foreach (PeptideSpectrumMatch psmPrecursor in query.precursor.psms_AllPossibilities)
                 *      {
                 *          if (psmPrecursor != psmQuery && psmPrecursor.Peptide == psmQuery.Peptide)//Peptide object should be the same
                 *          {
                 *              psmPrecursor.Merge(psmQuery, options);
                 *              isNew = false;
                 *              break;
                 *          }
                 *      }
                 *      if (isNew)
                 *          query.precursor.psms_AllPossibilities.Add(psmQuery);
                 *  }
                 * }//*/
            }
            //PeptideSpectrumMatches allPsms = new PeptideSpectrumMatches();
            int nbAssignedPrecursor = 0;

            foreach (Precursor precursor in queries.Precursors)
            {
                if (precursor.psms_AllPossibilities.Count > 0)
                {
                    nbAssignedPrecursor++;
                    //    allPsms.AddRange(precursor.psms);
                }
            }

            //TODO Check the impact of this approach!!!!
            //List<PeptideSpectrumMatch> okPSMs = allPsms.ComputeAtFDR(0.05);//TODO Add parameter for this value
            //Dictionary<PeptideSpectrumMatch, int> dicOfPsm = new Dictionary<PeptideSpectrumMatch, int>();
            //foreach(PeptideSpectrumMatch match in okPSMs)
            //    dicOfPsm.Add(match, 0);
            //foreach (Precursor precursor in spectra.Precursors)
            //{
            //    List<PeptideSpectrumMatch> newList = new List<PeptideSpectrumMatch>();
            //    foreach (PeptideSpectrumMatch psm in precursor.psms)
            //        if (dicOfPsm.ContainsKey(psm))
            //            newList.Add(psm);
            //    precursor.psms = newList;
            //}

            int nbAssignedQuery = 0;

            foreach (Query query in queries)
            {
                if (query.psms.Count > 0)
                {
                    nbAssignedQuery++;
                }
            }
            options.ConSole.WriteLine(nbAssignedQuery + " queries matched [" + nbAssignedPrecursor + " precursors] out of " + nbQueryConsidered + " psm computed");

            return(queries.Precursors);
        }
Beispiel #8
0
        private static GraphML_List <MsMsPeak> Deisotopebkp(GraphML_List <MsMsPeak> peaks, int maxCharge, MassTolerance isotopicMzTolerance)
        {
            GraphML_List <MsMsPeak> new_peaks = new GraphML_List <MsMsPeak>(peaks);

            peaks.Sort(MsMsPeak.AscendingMzComparison);

            for (int lowMassIndex = 0; lowMassIndex < new_peaks.Count - 1; lowMassIndex++)
            {
                if (new_peaks[lowMassIndex].Charge > 0)
                {
                    int    toRemove      = -1;
                    double bestMassError = isotopicMzTolerance.Value;
                    double aim           = Numerics.IsotopicMassShift(1, new_peaks[lowMassIndex].Charge) + new_peaks[lowMassIndex].MZ;

                    int potentialIsotopeIndex = lowMassIndex + 1;
                    while (potentialIsotopeIndex < new_peaks.Count && new_peaks[potentialIsotopeIndex].MZ < aim + bestMassError)
                    {
                        if (new_peaks[lowMassIndex].Intensity > new_peaks[potentialIsotopeIndex].Intensity)
                        {
                            double massError = Math.Abs(Numerics.CalculateMassError(new_peaks[potentialIsotopeIndex].MZ, aim, isotopicMzTolerance.Units));
                            if (massError < bestMassError)
                            {
                                bestMassError = massError;
                                toRemove      = potentialIsotopeIndex;
                            }
                        }
                        potentialIsotopeIndex++;
                    }
                    if (toRemove > 0)
                    {
                        new_peaks[lowMassIndex].Intensity += new_peaks[toRemove].Intensity;
                        new_peaks.RemoveAt(toRemove);
                    }
                }
            }
            return(new_peaks);
        }
Beispiel #9
0
        private static GraphML_List <MsMsPeak> AssignChargeStatesAndDeisotope(GraphML_List <MsMsPeak> peaks, int maxCharge, MassTolerance isotopicMzTolerance)
        {
            GraphML_List <MsMsPeak> new_peaks = new GraphML_List <MsMsPeak>(peaks);

            //peaks.Sort(MSPeak.AscendingMzComparison);

            int[] bestIsotopes    = new int[4];
            int[] currentIsotopes = new int[4];
            for (int lowMassIndex = 0; lowMassIndex < new_peaks.Count - 1; lowMassIndex++)
            {
                double bestChargeScore = 0;
                int    bestCharge      = 0;
                bestIsotopes[0] = 0; bestIsotopes[1] = 0; bestIsotopes[2] = 0; bestIsotopes[3] = 0;
                for (int charge = maxCharge; charge > 0; charge--)
                {
                    currentIsotopes[0] = 0; currentIsotopes[1] = 0; currentIsotopes[2] = 0; currentIsotopes[3] = 0;
                    double score = 0;
                    int    potentialIsotopeIndex = lowMassIndex + 1;
                    for (int isotope = 1; isotope <= 4; isotope++)
                    {
                        double bestMassError = isotopicMzTolerance.Value;
                        double aim           = Numerics.IsotopicMassShift(isotope, charge) + new_peaks[lowMassIndex].MZ;

                        while (potentialIsotopeIndex < new_peaks.Count && new_peaks[potentialIsotopeIndex].MZ < aim + bestMassError)
                        {
                            if (new_peaks[lowMassIndex].Intensity > new_peaks[potentialIsotopeIndex].Intensity)
                            {
                                double massError = Math.Abs(Numerics.CalculateMassError(new_peaks[potentialIsotopeIndex].MZ, aim, isotopicMzTolerance.Units));
                                if (massError < bestMassError)
                                {
                                    bestMassError = massError;
                                    currentIsotopes[isotope - 1] = potentialIsotopeIndex;
                                }
                            }
                            potentialIsotopeIndex++;
                        }
                        score += isotopicMzTolerance.Value - bestMassError;
                        if (score == 0)
                        {
                            break;
                        }
                        ;
                    }
                    if (score > bestChargeScore)
                    {
                        bestIsotopes[0] = currentIsotopes[0];
                        bestIsotopes[1] = currentIsotopes[1];
                        bestIsotopes[2] = currentIsotopes[2];
                        bestIsotopes[3] = currentIsotopes[3];
                        bestChargeScore = score;
                        bestCharge      = charge;
                    }
                }

                new_peaks[lowMassIndex].Charge = bestCharge;
                for (int i = 3; i >= 0; i--)
                {
                    if (bestIsotopes[i] > 0)
                    {
                        new_peaks[lowMassIndex].Intensity += new_peaks[bestIsotopes[i]].Intensity;
                        new_peaks.RemoveAt(bestIsotopes[i]);
                    }
                }
            }
            return(new_peaks);
        }
Beispiel #10
0
        private static GraphML_List <MsMsPeak> AssignChargeStatesbkp(GraphML_List <MsMsPeak> peaks, int maxCharge, MassTolerance isotopicMzTolerance)
        {
            GraphML_List <MsMsPeak> new_peaks = new GraphML_List <MsMsPeak>();

            for (int i = 0; i < peaks.Count - 1; i++)
            {
                int        j       = i + 1;
                List <int> charges = new List <int>();
                while (j < peaks.Count)
                {
                    if (peaks[j].MZ > (peaks[i].MZ + Constants.C12_C13_MASS_DIFFERENCE) + isotopicMzTolerance)
                    {
                        break;
                    }

                    for (int c = maxCharge; c >= 1; c--)
                    {
                        if (Math.Abs(Numerics.CalculateMassError(peaks[j].MZ, peaks[i].MZ + Constants.C12_C13_MASS_DIFFERENCE / (double)c, isotopicMzTolerance.Units)) <= isotopicMzTolerance.Value)
                        {
                            new_peaks.Add(new MsMsPeak(peaks[i].MZ, peaks[i].Intensity, c));
                            charges.Add(c);
                        }
                    }

                    j++;
                }
                if (charges.Count == 0)
                {
                    new_peaks.Add(new MsMsPeak(peaks[i].MZ, peaks[i].Intensity, 0));
                }
            }

            return(new_peaks);
        }