Beispiel #1
0
        private MzmlMzSpectrum CreateSpectrum(ChemicalFormula f, double lowerBound, double upperBound, int minCharge)
        {
            IsotopicDistribution isodist        = IsotopicDistribution.GetDistribution(f, 0.1, 0.001);
            MzmlMzSpectrum       notActuallyMzS = new MzmlMzSpectrum(isodist.Masses.ToArray(), isodist.Intensities.ToArray(), false);

            notActuallyMzS.ReplaceXbyApplyingFunction(b => b.X.ToMz(1));

            List <double> allMasses       = new List <double>();
            List <double> allIntensitiess = new List <double>();

            while (notActuallyMzS.FirstX > lowerBound)
            {
                foreach (var thisPeak in notActuallyMzS)
                {
                    if (thisPeak.Mz > lowerBound && thisPeak.Mz < upperBound)
                    {
                        allMasses.Add(thisPeak.Mz);
                        allIntensitiess.Add(thisPeak.Intensity);
                    }
                }
                minCharge     += 1;
                notActuallyMzS = new MzmlMzSpectrum(isodist.Masses.ToArray(), isodist.Intensities.ToArray(), false);
                notActuallyMzS.ReplaceXbyApplyingFunction(s => s.X.ToMz(minCharge));
            }

            var allMassesArray       = allMasses.ToArray();
            var allIntensitiessArray = allIntensitiess.ToArray();

            Array.Sort(allMassesArray, allIntensitiessArray);

            return(new MzmlMzSpectrum(allMassesArray, allIntensitiessArray, false));
        }
Beispiel #2
0
        private MzSpectrum CreateSpectrum(ChemicalFormula f, double lowerBound, double upperBound, int minCharge)
        {
            IsotopicDistribution isodist        = IsotopicDistribution.GetDistribution(f, 0.1, 0.001);
            MzSpectrum           notActuallyMzS = new MzSpectrum(isodist.Masses.ToArray(), isodist.Intensities.ToArray(), false);

            notActuallyMzS.ReplaceXbyApplyingFunction(b => b.Mz.ToMz(1));

            List <double> allMasses       = new List <double>();
            List <double> allIntensitiess = new List <double>();

            while (notActuallyMzS.FirstX > lowerBound)
            {
                for (int i = 0; i < notActuallyMzS.Size; i++)
                {
                    if (notActuallyMzS.XArray[i] > lowerBound && notActuallyMzS.XArray[i] < upperBound)
                    {
                        allMasses.Add(notActuallyMzS.XArray[i]);
                        allIntensitiess.Add(notActuallyMzS.YArray[i]);
                    }
                }
                minCharge     += 1;
                notActuallyMzS = new MzSpectrum(isodist.Masses.ToArray(), isodist.Intensities.ToArray(), false);
                notActuallyMzS.ReplaceXbyApplyingFunction(s => s.Mz.ToMz(minCharge));
            }

            var allMassesArray       = allMasses.ToArray();
            var allIntensitiessArray = allIntensitiess.ToArray();

            Array.Sort(allMassesArray, allIntensitiessArray);

            return(new MzSpectrum(allMassesArray, allIntensitiessArray, false));
        }
Beispiel #3
0
        private MzmlMzSpectrum CreateSpectrum(ChemicalFormula f, double lowerBound, double upperBound, int minCharge)
        {
            IsotopicDistribution isodist = IsotopicDistribution.GetDistribution(f, 0.1);

            return(new MzmlMzSpectrum(isodist.Masses.ToArray(), isodist.Intensities.ToArray(), false));
            //massSpectrum1 = massSpectrum1.FilterByNumberOfMostIntense(5);

            //var chargeToLookAt = minCharge;
            //var correctedSpectrum = massSpectrum1.NewSpectrumApplyFunctionToX(s => s.ToMz(chargeToLookAt));

            //List<double> allMasses = new List<double>();
            //List<double> allIntensitiess = new List<double>();

            //while (correctedSpectrum.FirstX > lowerBound)
            //{
            //    foreach (var thisPeak in correctedSpectrum)
            //    {
            //        if (thisPeak.Mz > lowerBound && thisPeak.Mz < upperBound)
            //        {
            //            allMasses.Add(thisPeak.Mz);
            //            allIntensitiess.Add(thisPeak.Intensity);
            //        }
            //    }
            //    chargeToLookAt += 1;
            //    correctedSpectrum = massSpectrum1.NewSpectrumApplyFunctionToX(s => s.ToMz(chargeToLookAt));
            //}

            //var allMassesArray = allMasses.ToArray();
            //var allIntensitiessArray = allIntensitiess.ToArray();

            //Array.Sort(allMassesArray, allIntensitiessArray);

            //return new MzmlMzSpectrum(allMassesArray, allIntensitiessArray, false);
        }
Beispiel #4
0
        public static void TestIsotopicDistribution()
        {
            ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C2H3NO");

            var a = IsotopicDistribution.GetDistribution(formulaA);

            Assert.True(Math.Abs(formulaA.MonoisotopicMass - a.Masses.ToArray()[Array.IndexOf(a.Intensities.ToArray(), a.Intensities.Max())]) < 1e-9);
        }
        public static void TestCoIsolation()
        {
            List <DigestionMotif> motifs = new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            };
            Protease protease = new Protease("CustProtease", CleavageSpecificity.Full, null, null, motifs);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            CommonParameters CommonParameters = new CommonParameters(scoreCutoff: 1, deconvolutionIntensityRatio: 50, digestionParams: new DigestionParams(protease.Name, minPeptideLength: 1));

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();
            var proteinList           = new List <Protein> {
                new Protein("MNNNKNDNK", null)
            };

            var searchModes = new SinglePpmAroundZeroSearchMode(5);

            Proteomics.AminoAcidPolymer.Peptide pep1 = new Proteomics.AminoAcidPolymer.Peptide("NNNK");
            Proteomics.AminoAcidPolymer.Peptide pep2 = new Proteomics.AminoAcidPolymer.Peptide("NDNK");

            var dist1 = IsotopicDistribution.GetDistribution(pep1.GetChemicalFormula(), 0.1, 0.01);

            var dist2 = IsotopicDistribution.GetDistribution(pep2.GetChemicalFormula(), 0.1, 0.01);

            MsDataScan[] Scans          = new MsDataScan[2];
            double[]     ms1intensities = new double[] { 0.8, 0.8, 0.2, 0.02, 0.2, 0.02 };
            double[]     ms1mzs         = dist1.Masses.Concat(dist2.Masses).OrderBy(b => b).Select(b => b.ToMz(1)).ToArray();

            double selectedIonMz = ms1mzs[1];

            MzSpectrum MS1 = new MzSpectrum(ms1mzs, ms1intensities, false);

            Scans[0] = new MsDataScan(MS1, 1, 1, false, Polarity.Positive, 1.0, new MzRange(300, 2000), "first spectrum", MZAnalyzerType.Unknown, MS1.SumOfAllY, null, null, "scan=1");

            double[]   ms2intensities = new double[] { 1, 1, 1, 1, 1 };
            double[]   ms2mzs         = new double[] { 146.106.ToMz(1), 228.086.ToMz(1), 229.07.ToMz(1), 260.148.ToMz(1), 342.129.ToMz(1) };
            MzSpectrum MS2            = new MzSpectrum(ms2mzs, ms2intensities, false);
            double     isolationMZ    = selectedIonMz;

            Scans[1] = new MsDataScan(MS2, 2, 2, false, Polarity.Positive, 2.0, new MzRange(100, 1500), "second spectrum", MZAnalyzerType.Unknown, MS2.SumOfAllY, null, null, "scan=2", selectedIonMz, null, null, isolationMZ, 2.5, DissociationType.HCD, 1, null);

            var myMsDataFile = new MsDataFile(Scans, null);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters(deconvolutionIntensityRatio: 50)).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];;
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();

            // Two matches for this single scan! Corresponding to two co-isolated masses
            Assert.AreEqual(2, allPsmsArray.Length);

            Assert.IsTrue(allPsmsArray[0].Score > 1);
            Assert.AreEqual(2, allPsmsArray[0].ScanNumber);

            Assert.AreEqual("NNNK", allPsmsArray[0].BaseSequence);
            Assert.AreEqual("NDNK", allPsmsArray[1].BaseSequence);
        }
Beispiel #6
0
        public static void TestPeakSplittingRight()
        {
            string fileToWrite = "myMzml.mzML";
            string peptide     = "PEPTIDE";
            double intensity   = 1e6;

            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat"));

            // generate mzml file

            // 1 MS1 scan per peptide
            MsDataScan[] scans = new MsDataScan[10];
            double[]     intensityMultipliers = { 1, 3, 5, 10, 5, 3, 1, 1, 3, 1 };

            for (int s = 0; s < scans.Length; s++)
            {
                ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula();
                IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                double[]             intensities = dist.Intensities.Select(v => v * intensity * intensityMultipliers[s]).ToArray();

                // add the scan
                scans[s] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: s + 1, msnOrder: 1, isCentroid: true,
                                          polarity: Polarity.Positive, retentionTime: 1.0 + s / 10.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                          mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (s + 1));
            }

            // write the .mzML
            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans),
                                                                          Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), false);

            // set up spectra file info
            SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, fileToWrite), "", 0, 0, 0);

            // create some PSMs
            var pg = new ProteinGroup("MyProtein", "gene", "org");

            Identification id1 = new Identification(file1, peptide, peptide,
                                                    new Proteomics.AminoAcidPolymer.Peptide(peptide).MonoisotopicMass, 1.3 + 0.001, 1, new List <ProteinGroup> {
                pg
            });

            // create the FlashLFQ engine
            FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> {
                id1
            });

            // run the engine
            var results = engine.Run();
            ChromatographicPeak peak = results.Peaks.First().Value.First();

            Assert.That(peak.Apex.RetentionTime == 1.3);
            Assert.That(peak.SplitRT == 1.6);
            Assert.That(!peak.IsotopicEnvelopes.Any(p => p.RetentionTime > 1.6));
            Assert.That(peak.IsotopicEnvelopes.Count == 6);
        }
Beispiel #7
0
        public static void ThresholdProbability()
        {
            ChemicalFormula formulaA = ChemicalFormula.ParseFormula("CO");

            // Only the principal isotopes have joint probability of 0.5! So one result when calcuating isotopic distribution
            var a = IsotopicDistribution.GetDistribution(formulaA, 0.0001, 0.5);

            Assert.AreEqual(1, a.Masses.Count());
            Assert.IsTrue(Math.Abs((PeriodicTable.GetElement("C").PrincipalIsotope.AtomicMass + PeriodicTable.GetElement("O").PrincipalIsotope.AtomicMass - a.Masses.First())) < 1e-9);
        }
Beispiel #8
0
        public static void I0j1()
        {
            ChemicalFormula formula = (ChemicalFormula.ParseFormula("C50O50"));

            IsotopicDistribution.GetDistribution(formula, 0.01, 0.1);

            IsotopicDistribution.GetDistribution(formula, 0.01, 0.5);

            IsotopicDistribution.GetDistribution(formula, 0.01, 0.75);
        }
Beispiel #9
0
        public static void IsoTest()
        {
            ChemicalFormula formula = ChemicalFormula.ParseFormula("C5H8NO");

            IsotopicDistribution d = IsotopicDistribution.GetDistribution(formula, Math.Pow(2, -14));

            Assert.AreEqual(324, d.Intensities.Count());

            d = IsotopicDistribution.GetDistribution(formula, Math.Pow(2, -1));

            Assert.AreEqual(17, d.Intensities.Count());
        }
        public static double CalculateProteoformMass(string sequence, List <Ptm> ptm_combination)
        {
            if (Sweet.lollipop.theoretical_database.aaIsotopeMassList == null)
            {
                Sweet.lollipop.theoretical_database.populate_aa_mass_dictionary();
            }

            if (!Sweet.lollipop.most_abundant_mass)
            {
                double        proteoformMass = 18.010565; // start with water
                char[]        aminoAcids     = sequence.ToCharArray();
                List <double> aaMasses       = new List <double>();
                for (int i = 0; i < sequence.Length; i++)
                {
                    if (Sweet.lollipop.theoretical_database.aaIsotopeMassList.ContainsKey(aminoAcids[i]))
                    {
                        aaMasses.Add(Sweet.lollipop.theoretical_database.aaIsotopeMassList[aminoAcids[i]]);
                    }
                }

                return(proteoformMass + aaMasses.Sum() +
                       ptm_combination.Sum(p => (double)p.modification.MonoisotopicMass));
            }

            //if most abundant mass, calculate iso distrubution, set modified/unmodified masses to most abundant.
            var formula = new Proteomics.AminoAcidPolymer.Peptide(sequence).GetChemicalFormula();

            // append mod formulas
            foreach (var mod in ptm_combination)
            {
                var modCf = mod.modification.ChemicalFormula;

                if (modCf != null)
                {
                    formula.Add(modCf);
                }
            }

            // Calculate isotopic distribution of the full peptide
            var dist = IsotopicDistribution.GetDistribution(formula, 0.1, 1e-12);

            double[] masses        = dist.Masses.ToArray();
            double[] intensities   = dist.Intensities.ToArray();
            double   max           = intensities.Max();
            int      modeMassIndex = Array.IndexOf(intensities, max);

            return(masses[modeMassIndex]);
        }
Beispiel #11
0
        public void TestCoIsolation()
        {
            Peptide pep1 = new Peptide("AAAAAA");
            Peptide pep2 = new Peptide("AAA[H]AAA");

            var dist1 = IsotopicDistribution.GetDistribution(pep1.GetChemicalFormula(), 0.1, 0.01);

            var dist2 = IsotopicDistribution.GetDistribution(pep2.GetChemicalFormula(), 0.1, 0.01);

            MsDataScan[] Scans          = new MsDataScan[2];
            double[]     ms1intensities = new double[] { 0.8, 0.8, 0.2, 0.02, 0.2, 0.02 };
            double[]     ms1mzs         = dist1.Masses.Concat(dist2.Masses).OrderBy(b => b).Select(b => b.ToMz(1)).ToArray();

            double selectedIonMz = ms1mzs[1];

            MzSpectrum MS1 = new MzSpectrum(ms1mzs, ms1intensities, false);

            Scans[0] = new MsDataScan(MS1, 1, 1, false, Polarity.Positive, 1.0, new MzRange(300, 2000), "first spectrum", MZAnalyzerType.Unknown, MS1.SumOfAllY, null, null, null);

            // Horrible fragmentation, but we don't care about this!
            double[]   ms2intensities = new double[] { 1000 };
            double[]   ms2mzs         = new double[] { 1000 };
            MzSpectrum MS2            = new MzSpectrum(ms2mzs, ms2intensities, false);
            double     isolationMZ    = selectedIonMz;

            Scans[1] = new MsDataScan(MS2, 2, 2, false, Polarity.Positive, 2.0, new MzRange(100, 1500), "second spectrum", MZAnalyzerType.Unknown, MS2.SumOfAllY, null, null, null, selectedIonMz, null, null, isolationMZ, 2.5, DissociationType.HCD, 1, null);

            var myMsDataFile = new FakeMsDataFile(Scans);

            var cool = myMsDataFile.GetAllScansList().Last();

            int       maxAssumedChargeState = 1;
            Tolerance massTolerance         = Tolerance.ParseToleranceString("10 PPM");

            var isolatedMasses = cool.GetIsolatedMassesAndCharges(myMsDataFile.GetOneBasedScan(cool.OneBasedPrecursorScanNumber.Value).MassSpectrum, 1, maxAssumedChargeState, 10, 5).ToList();

            Assert.AreEqual(2, isolatedMasses.Count);
            Assert.AreEqual(2, isolatedMasses.Count(b => b.charge == 1));
            Assert.AreEqual(pep1.MonoisotopicMass, isolatedMasses.Select(b => b.peaks.First().Item1.ToMass(b.charge)).Min(), 1e-9);
            Assert.AreEqual(pep2.MonoisotopicMass, isolatedMasses.Select(b => b.peaks.First().Item1.ToMass(b.charge)).Max(), 1e-9);
            Assert.AreEqual(pep1.MonoisotopicMass, isolatedMasses.Select(b => b.monoisotopicMass.ToMz(b.charge).ToMass(b.charge)).Min(), 1e-9);
            Assert.AreEqual(pep2.MonoisotopicMass, isolatedMasses.Select(b => b.monoisotopicMass.ToMz(b.charge).ToMass(b.charge)).Max(), 1e-9);
        }
Beispiel #12
0
        private static void BenchmarkIsotopicDistribution()
        {
            Console.WriteLine("Starting benchmark BenchmarkIsotopicDistribution");

            int numRepetitions = 100;

            Stopwatch stopWatch = new Stopwatch();

            var    a = ChemicalFormula.ParseFormula("H100C100N100O100S100");
            double b = 0;

            stopWatch.Restart();
            for (int i = 0; i < numRepetitions; i++)
            {
                b += IsotopicDistribution.GetDistribution(a).Intensities.First();
            }
            stopWatch.Stop();
            Console.WriteLine("Time for generating isotopic distributions: " + stopWatch.Elapsed + " a = " + a);

            Console.WriteLine("Benchmark BenchmarkIsotopicDistribution finished");
        }
Beispiel #13
0
        static IsoDecon()
        {
            //AVERAGINE
            const double averageC = 4.9384;
            const double averageH = 7.7583;
            const double averageO = 1.4773;
            const double averageN = 1.3577;
            const double averageS = 0.0417;

            const double fineRes = 0.125;
            const double minRes  = 1e-8;

            for (int i = 0; i < numAveraginesToGenerate; i++)
            {
                double averagineMultiplier = (i + 1) / 2.0;
                //Console.Write("numAveragines = " + numAveragines);
                ChemicalFormula chemicalFormula = new ChemicalFormula();
                chemicalFormula.Add("C", Convert.ToInt32(averageC * averagineMultiplier));
                chemicalFormula.Add("H", Convert.ToInt32(averageH * averagineMultiplier));
                chemicalFormula.Add("O", Convert.ToInt32(averageO * averagineMultiplier));
                chemicalFormula.Add("N", Convert.ToInt32(averageN * averagineMultiplier));
                chemicalFormula.Add("S", Convert.ToInt32(averageS * averagineMultiplier));

                {
                    var chemicalFormulaReg  = chemicalFormula;
                    IsotopicDistribution ye = IsotopicDistribution.GetDistribution(chemicalFormulaReg, fineRes, minRes);
                    var masses      = ye.Masses.ToArray();
                    var intensities = ye.Intensities.ToArray();
                    Array.Sort(intensities, masses);
                    Array.Reverse(intensities);
                    Array.Reverse(masses);

                    mostIntenseMasses[i]  = masses[0];
                    diffToMonoisotopic[i] = masses[0] - chemicalFormulaReg.MonoisotopicMass;
                    allMasses[i]          = masses;
                    allIntensities[i]     = intensities;
                }
            }
        }
Beispiel #14
0
        public static void TestIsotopicDistribution3()
        {
            ChemicalFormula formulaA = ChemicalFormula.ParseFormula("CO");

            // Distinguish between O and C isotope masses
            var a = IsotopicDistribution.GetDistribution(formulaA, 0.0001);

            Assert.AreEqual(6, a.Masses.Count());

            // Do not distinguish between O and C isotope masses
            IsotopicDistribution.GetDistribution(formulaA, 0.001);

            // Do not distinguish between O and C isotope masses
            var b = IsotopicDistribution.GetDistribution(formulaA);

            Assert.AreEqual(4, b.Masses.Count());

            IsotopicDistribution.GetDistribution(formulaA, 0.1);

            PhysicalObjectWithChemicalFormula formulaB = new PhysicalObjectWithChemicalFormula("CO");

            IsotopicDistribution.GetDistribution(formulaB.ThisChemicalFormula, 1);
        }
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            Status("Extracting data points:");
            // The final training point list

            int numMs1MassChargeCombinationsConsidered = 0;
            int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            int numMs2MassChargeCombinationsConsidered = 0;
            int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            List <LabeledMs1DataPoint> Ms1List = new List <LabeledMs1DataPoint>();
            List <LabeledMs2DataPoint> Ms2List = new List <LabeledMs2DataPoint>();

            int numIdentifications = goodIdentifications.Count;

            // Loop over identifications

            HashSet <string> sequences = new HashSet <string>();

            object lockObj  = new object();
            object lockObj2 = new object();

            Parallel.ForEach(Partitioner.Create(0, numIdentifications), fff =>
            {
                for (int matchIndex = fff.Item1; matchIndex < fff.Item2; matchIndex++)
                {
                    PeptideSpectralMatch identification = goodIdentifications[matchIndex];

                    // Each identification has an MS2 spectrum attached to it.
                    int ms2scanNumber = identification.ScanNumber;
                    int peptideCharge = identification.ScanPrecursorCharge;
                    if (identification.FullSequence == null)
                    {
                        continue;
                    }

                    var representativeSinglePeptide = identification.CompactPeptides.First().Value.Item2.First();

                    // Get the peptide, don't forget to add the modifications!!!!
                    var SequenceWithChemicalFormulas = representativeSinglePeptide.SequenceWithChemicalFormulas;
                    if (SequenceWithChemicalFormulas == null || representativeSinglePeptide.allModsOneIsNterminus.Any(b => b.Value.neutralLosses.Count != 1 || b.Value.neutralLosses.First() != 0))
                    {
                        continue;
                    }
                    Proteomics.Peptide coolPeptide = new Proteomics.Peptide(SequenceWithChemicalFormulas);

                    var ms2tuple = SearchMS2Spectrum(myMsDataFile.GetOneBasedScan(ms2scanNumber) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >, coolPeptide, peptideCharge, identification);

                    // If MS2 has low evidence for peptide, skip and go to next one
                    if (ms2tuple.Item4 < numFragmentsNeededForEveryIdentification)
                    {
                        continue;
                    }

                    lock (lockObj2)
                    {
                        Ms2List.AddRange(ms2tuple.Item1);
                        numMs2MassChargeCombinationsConsidered += ms2tuple.Item2;
                        numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms2tuple.Item3;
                        if (sequences.Contains(identification.FullSequence))
                        {
                            continue; // Do not search same sequence multiple times in MS1 scans
                        }
                        sequences.Add(identification.FullSequence);
                    }

                    // Calculate isotopic distribution of the full peptide
                    var dist = IsotopicDistribution.GetDistribution(coolPeptide.GetChemicalFormula(), fineResolutionForIsotopeDistCalculation, 0.001);

                    double[] theoreticalMasses      = dist.Masses.ToArray();
                    double[] theoreticalIntensities = dist.Intensities.ToArray();

                    Array.Sort(theoreticalIntensities, theoreticalMasses, Comparer <double> .Create((x, y) => y.CompareTo(x)));

                    var ms1tupleBack = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, -1, peptideCharge, identification);

                    var ms1tupleForward = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, 1, peptideCharge, identification);

                    lock (lockObj)
                    {
                        Ms1List.AddRange(ms1tupleBack.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleBack.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleBack.Item3;
                        Ms1List.AddRange(ms1tupleForward.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleForward.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleForward.Item3;
                    }
                }
            });

            return(new DataPointAquisitionResults(this,
                                                  Ms1List,
                                                  Ms2List,
                                                  numMs1MassChargeCombinationsConsidered,
                                                  numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks,
                                                  numMs2MassChargeCombinationsConsidered,
                                                  numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks
                                                  ));
        }
Beispiel #16
0
        public static void TestFlashLfqAdvancedProteinQuant()
        {
            List <string> filesToWrite = new List <string> {
                "mzml_1", "mzml_2"
            };
            List <string> pepSequences = new List <string> {
                "PEPTIDE", "MYPEPTIDE", "VVVVVPEPTIDE"
            };

            double[,] amounts = new double[2, 3] {
                { 1000000, 1000000, 1000000 },
                { 2000000, 2000000, 900000 }
            };
            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat"));

            // generate mzml files (3 peptides each)
            for (int f = 0; f < filesToWrite.Count; f++)
            {
                // 1 MS1 scan per peptide
                MsDataScan[] scans = new MsDataScan[3];

                for (int p = 0; p < pepSequences.Count; p++)
                {
                    ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(pepSequences[p]).GetChemicalFormula();
                    IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                    double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                    double[]             intensities = dist.Intensities.Select(v => v * amounts[f, p]).ToArray();

                    // add the scan
                    scans[p] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: p + 1, msnOrder: 1, isCentroid: true,
                                              polarity: Polarity.Positive, retentionTime: 1.0 + (p / 10.0), scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                              mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (p + 1));
                }

                // write the .mzML
                IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans),
                                                                              Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[f] + ".mzML"), false);
            }

            // set up spectra file info
            SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[0] + ".mzML"), "a", 0, 0, 0);
            SpectraFileInfo file2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[1] + ".mzML"), "a", 1, 0, 0);

            // create some PSMs
            var            pg  = new ProteinGroup("MyProtein", "gene", "org");
            Identification id1 = new Identification(file1, "PEPTIDE", "PEPTIDE", 799.35996, 1.01, 1, new List <ProteinGroup> {
                pg
            });
            Identification id2 = new Identification(file1, "MYPEPTIDE", "MYPEPTIDE", 1093.46377, 1.11, 1, new List <ProteinGroup> {
                pg
            });
            Identification id3 = new Identification(file1, "VVVVVPEPTIDE", "VVVVVPEPTIDE", 1294.70203, 1.21, 1, new List <ProteinGroup> {
                pg
            });

            Identification id4 = new Identification(file2, "PEPTIDE", "PEPTIDE", 799.35996, 1.01, 1, new List <ProteinGroup> {
                pg
            });
            Identification id5 = new Identification(file2, "MYPEPTIDE", "MYPEPTIDE", 1093.46377, 1.11, 1, new List <ProteinGroup> {
                pg
            });
            Identification id6 = new Identification(file2, "VVVVVPEPTIDE", "VVVVVPEPTIDE", 1294.70203, 1.21, 1, new List <ProteinGroup> {
                pg
            });

            // create the FlashLFQ engine
            FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> {
                id1, id2, id3, id4, id5, id6
            }, normalize: false, advancedProteinQuant: true);

            // run the engine
            var results = engine.Run();

            // third peptide should be low-weighted
            // protein should be ~sum of first two peptide intensities (a little lower, because some smaller isotope peaks get skipped)
            double file1ProteinIntensity = results.ProteinGroups["MyProtein"].GetIntensity(file1);

            Assert.That(file1ProteinIntensity < 2e6);
            Assert.That(file1ProteinIntensity > 1e6);

            double file2ProteinIntensity = results.ProteinGroups["MyProtein"].GetIntensity(file2);

            Assert.That(file2ProteinIntensity < 4e6);
            Assert.That(file2ProteinIntensity > 3e6);
        }
        static DeconvolutionEngine()
        {
            // AVERAGINE
            const double averageC = 5.0359;
            const double averageH = 7.9273;
            const double averageO = 1.52996;
            const double averageN = 1.3608;
            const double averageS = 0.0342;

            const double fineRes = 0.125;
            const double minRes  = 1e-8;

            double maxMass = 100000;
            double averagineDaltonResolution = 50.0;

            double averagineMass = PeriodicTable.GetElement("C").PrincipalIsotope.AtomicMass *averageC
                                   + PeriodicTable.GetElement("H").PrincipalIsotope.AtomicMass *averageH
                                   + PeriodicTable.GetElement("O").PrincipalIsotope.AtomicMass *averageO
                                   + PeriodicTable.GetElement("N").PrincipalIsotope.AtomicMass *averageN
                                   + PeriodicTable.GetElement("S").PrincipalIsotope.AtomicMass *averageS;

            int numAveragines = (int)Math.Ceiling(maxMass / averagineDaltonResolution) + 1;

            allMasses          = new double[numAveragines][];
            allIntensities     = new double[numAveragines][];
            mostIntenseMasses  = new double[numAveragines];
            diffToMonoisotopic = new double[numAveragines];

            for (int i = 1; i < numAveragines; i++)
            {
                double mass = i * averagineDaltonResolution;

                double averagines = mass / averagineMass;

                if (mass < 50)
                {
                    continue;
                }

                ChemicalFormula chemicalFormula = new ChemicalFormula();
                chemicalFormula.Add("C", Convert.ToInt32(averageC * averagines));
                chemicalFormula.Add("H", Convert.ToInt32(averageH * averagines));
                chemicalFormula.Add("O", Convert.ToInt32(averageO * averagines));
                chemicalFormula.Add("N", Convert.ToInt32(averageN * averagines));
                chemicalFormula.Add("S", Convert.ToInt32(averageS * averagines));

                var chemicalFormulaReg = chemicalFormula;
                IsotopicDistribution isotopicDistribution = IsotopicDistribution.GetDistribution(chemicalFormulaReg, fineRes, minRes);
                var masses               = isotopicDistribution.Masses.ToArray();
                var intensities          = isotopicDistribution.Intensities.ToArray();
                var mostIntense          = intensities.Max();
                int indOfMostIntensePeak = Array.IndexOf(intensities, mostIntense);

                for (int j = 0; j < intensities.Length; j++)
                {
                    intensities[j] /= mostIntense;
                }

                mostIntenseMasses[i]  = masses[indOfMostIntensePeak];
                diffToMonoisotopic[i] = masses[indOfMostIntensePeak] - chemicalFormulaReg.MonoisotopicMass;
                allMasses[i]          = masses;
                allIntensities[i]     = intensities;
            }
        }
        private (List <LabeledMs2DataPoint>, int, int, int) SearchMS2Spectrum(IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2DataScan, Proteomics.Peptide peptide, int peptideCharge, PeptideSpectralMatch identification)
        {
            List <LabeledMs2DataPoint> result          = new List <LabeledMs2DataPoint>();
            int numMs2MassChargeCombinationsConsidered = 0;
            int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            int numFragmentsIdentified = 0;

            if (ms2DataScan.MassSpectrum.Size == 0)
            {
                return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified);
            }

            // Key: mz value, Value: error
            var addedPeaks = new Dictionary <double, double>();

            var countForThisMS2  = 0;
            var countForThisMS2a = 0;

            var scanWindowRange = ms2DataScan.ScanWindowRange;

            IHasChemicalFormula[] fragmentList = peptide.Fragment(fragmentTypesForCalibration, true).OfType <IHasChemicalFormula>().ToArray();

            foreach (var fragment in fragmentList)
            {
                bool     fragmentIdentified    = false;
                bool     computedIsotopologues = false;
                double[] masses      = new double[0];
                double[] intensities = new double[0];
                // First look for monoisotopic masses, do not compute distribution spectrum!

                for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++)
                {
                    var monoisotopicMZ = fragment.MonoisotopicMass.ToMz(chargeToLookAt);
                    if (monoisotopicMZ > scanWindowRange.Maximum)
                    {
                        continue;
                    }
                    if (monoisotopicMZ < scanWindowRange.Minimum)
                    {
                        break;
                    }
                    var closestPeakMZ = ms2DataScan.MassSpectrum.GetClosestPeakXvalue(monoisotopicMZ);

                    if (mzToleranceForMs2Search.Within(closestPeakMZ.Value, monoisotopicMZ) && !computedIsotopologues)
                    {
                        var dist = IsotopicDistribution.GetDistribution(fragment.ThisChemicalFormula, fineResolutionForIsotopeDistCalculation, 0.001);

                        masses      = dist.Masses.ToArray();
                        intensities = dist.Intensities.ToArray();

                        Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x)));
                        computedIsotopologues = true;
                        break;
                    }
                }

                if (computedIsotopologues)
                {
                    bool startingToAdd = false;
                    for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++)
                    {
                        if (masses.First().ToMz(chargeToLookAt) > scanWindowRange.Maximum)
                        {
                            continue;
                        }
                        if (masses.Last().ToMz(chargeToLookAt) < scanWindowRange.Minimum)
                        {
                            break;
                        }
                        var trainingPointsToAverage = new List <LabeledMs2DataPoint>();
                        foreach (double a in masses)
                        {
                            double theMZ = a.ToMz(chargeToLookAt);
                            var    npwr  = ms2DataScan.MassSpectrum.NumPeaksWithinRange(mzToleranceForMs2Search.GetMinimumValue(theMZ), mzToleranceForMs2Search.GetMaximumValue(theMZ));
                            if (npwr == 0)
                            {
                                break;
                            }
                            numMs2MassChargeCombinationsConsidered++;
                            if (npwr > 1)
                            {
                                numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++;
                                continue;
                            }

                            var closestPeakIndex = ms2DataScan.MassSpectrum.GetClosestPeakIndex(theMZ);
                            var closestPeakMZ    = ms2DataScan.MassSpectrum.XArray[closestPeakIndex.Value];

                            if (!addedPeaks.ContainsKey(closestPeakMZ))
                            {
                                addedPeaks.Add(closestPeakMZ, Math.Abs(closestPeakMZ - theMZ));
                                trainingPointsToAverage.Add(new LabeledMs2DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(ms2DataScan.MassSpectrum.YArray[closestPeakIndex.Value]), theMZ, null));
                            }
                        }
                        // If started adding and suddnely stopped, go to next one, no need to look at higher charges
                        if (trainingPointsToAverage.Count == 0 && startingToAdd)
                        {
                            break;
                        }
                        if (trainingPointsToAverage.Count < Math.Min(minMS2isotopicPeaksNeededForConfirmedIdentification, intensities.Count()))
                        {
                        }
                        else
                        {
                            startingToAdd = true;
                            if (!fragmentIdentified)
                            {
                                fragmentIdentified      = true;
                                numFragmentsIdentified += 1;
                            }

                            countForThisMS2 += trainingPointsToAverage.Count;
                            countForThisMS2a++;
                            result.Add(new LabeledMs2DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(),
                                                               ms2DataScan.RetentionTime,
                                                               Math.Log(ms2DataScan.TotalIonCurrent),
                                                               ms2DataScan.InjectionTime.HasValue ? Math.Log(ms2DataScan.InjectionTime.Value) : double.NaN,
                                                               trainingPointsToAverage.Select(b => b.logIntensity).Average(),
                                                               trainingPointsToAverage.Select(b => b.expectedMZ).Average(),
                                                               identification));
                        }
                    }
                }
            }

            return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified);
        }
        public static void TestCoIsolation()
        {
            Protease protease = new Protease("CustProtease", new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("K", TerminusType.C)
            }, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            CommonParameters CommonParameters = new CommonParameters(scoreCutoff: 1, deconvolutionIntensityRatio: 50, digestionParams: new DigestionParams(protease.Name, minPeptideLength: 1));

            var variableModifications = new List <ModificationWithMass>();
            var fixedModifications    = new List <ModificationWithMass>();
            var proteinList           = new List <Protein> {
                new Protein("MNNNKNDNK", null)
            };

            var searchModes = new SinglePpmAroundZeroSearchMode(5);

            Proteomics.AminoAcidPolymer.Peptide pep1 = new Proteomics.AminoAcidPolymer.Peptide("NNNK");
            Proteomics.AminoAcidPolymer.Peptide pep2 = new Proteomics.AminoAcidPolymer.Peptide("NDNK");

            var dist1 = IsotopicDistribution.GetDistribution(pep1.GetChemicalFormula(), 0.1, 0.01);

            var dist2 = IsotopicDistribution.GetDistribution(pep2.GetChemicalFormula(), 0.1, 0.01);

            MsDataScan[] Scans          = new MsDataScan[2];
            double[]     ms1intensities = new double[] { 0.8, 0.8, 0.2, 0.02, 0.2, 0.02 };
            double[]     ms1mzs         = dist1.Masses.Concat(dist2.Masses).OrderBy(b => b).Select(b => b.ToMz(1)).ToArray();

            double selectedIonMz = ms1mzs[1];

            MzSpectrum MS1 = new MzSpectrum(ms1mzs, ms1intensities, false);

            Scans[0] = new MsDataScan(MS1, 1, 1, false, Polarity.Positive, 1.0, new MzRange(300, 2000), "first spectrum", MZAnalyzerType.Unknown, MS1.SumOfAllY, null, null, "scan=1");

            double[]   ms2intensities = new double[] { 1, 1, 1, 1, 1 };
            double[]   ms2mzs         = new double[] { 146.106.ToMz(1), 228.086.ToMz(1), 229.07.ToMz(1), 260.148.ToMz(1), 342.129.ToMz(1) };
            MzSpectrum MS2            = new MzSpectrum(ms2mzs, ms2intensities, false);
            double     isolationMZ    = selectedIonMz;

            Scans[1] = new MsDataScan(MS2, 2, 2, false, Polarity.Positive, 2.0, new MzRange(100, 1500), "second spectrum", MZAnalyzerType.Unknown, MS2.SumOfAllY, null, null, "scan=2", selectedIonMz, null, null, isolationMZ, 2.5, DissociationType.HCD, 1, null);

            var myMsDataFile = new MsDataFile(Scans, null);

            bool      DoPrecursorDeconvolution           = true;
            bool      UseProvidedPrecursorInfo           = true;
            double    DeconvolutionIntensityRatio        = 50;
            int       DeconvolutionMaxAssumedChargeState = 10;
            Tolerance DeconvolutionMassTolerance         = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, DoPrecursorDeconvolution, UseProvidedPrecursorInfo, DeconvolutionIntensityRatio, DeconvolutionMaxAssumedChargeState, DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            List <ProductType> lp = new List <ProductType> {
                ProductType.B, ProductType.Y
            };

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchModes, CommonParameters, new List <string>()).Run();

            // Two matches for this single scan! Corresponding to two co-isolated masses
            Assert.AreEqual(2, allPsmsArray.Length);

            Assert.IsTrue(allPsmsArray[0].Score > 1);
            Assert.AreEqual(2, allPsmsArray[0].ScanNumber);

            var ojdfkj = (SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(new List <PeptideSpectralMatch>
            {
                allPsmsArray[0], allPsmsArray[1]
            }, proteinList, fixedModifications, variableModifications, lp, new List <DigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>()).Run();

            foreach (var huh in allPsmsArray)
            {
                if (huh != null)
                {
                    huh.MatchToProteinLinkedPeptides(ojdfkj.CompactPeptideToProteinPeptideMatching);
                }
            }

            Assert.AreEqual("NNNK", allPsmsArray[0].BaseSequence);
            Assert.AreEqual("NDNK", allPsmsArray[1].BaseSequence);
        }
        public static void TestProteinQuantFileHeaders(bool hasDefinedExperimentalDesign, int bioreps, int fractions, int techreps)
        {
            // create the unit test directory
            string unitTestFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestProteinQuantFileHeaders");

            Directory.CreateDirectory(unitTestFolder);

            List <SpectraFileInfo> fileInfos = new List <SpectraFileInfo>();
            string peptide      = "PEPTIDE";
            double ionIntensity = 1e6;
            string condition    = hasDefinedExperimentalDesign ? "TestCondition" : "";

            // create the protein database
            Protein prot   = new Protein(peptide, @"");
            string  dbName = Path.Combine(unitTestFolder, "testDB.fasta");

            UsefulProteomicsDatabases.ProteinDbWriter.WriteFastaDatabase(new List <Protein> {
                prot
            }, dbName, ">");

            // create the .mzML files to search/quantify
            for (int b = 0; b < bioreps; b++)
            {
                for (int f = 0; f < fractions; f++)
                {
                    for (int r = 0; r < techreps; r++)
                    {
                        string fileToWrite = "file_" + "b" + b + "f" + f + "r" + r + ".mzML";

                        // generate mzml file
                        MsDataScan[] scans = new MsDataScan[2];

                        // create the MS1 scan
                        ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(peptide).GetChemicalFormula();
                        IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                        double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                        double[]             intensities = dist.Intensities.Select(v => v * ionIntensity).ToArray();

                        scans[0] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: 1, msnOrder: 1, isCentroid: true,
                                                  polarity: Polarity.Positive, retentionTime: 1.0, scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                                  mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=1");

                        // create the MS2 scan
                        var            pep   = new PeptideWithSetModifications(peptide, new Dictionary <string, Proteomics.Modification>());
                        List <Product> frags = new List <Product>();
                        pep.Fragment(DissociationType.HCD, FragmentationTerminus.Both, frags);
                        double[] mz2          = frags.Select(v => v.NeutralMass.ToMz(1)).ToArray();
                        double[] intensities2 = frags.Select(v => 1e6).ToArray();

                        scans[1] = new MsDataScan(massSpectrum: new MzSpectrum(mz2, intensities2, false), oneBasedScanNumber: 2, msnOrder: 2, isCentroid: true,
                                                  polarity: Polarity.Positive, retentionTime: 1.01, scanWindowRange: new MzRange(100, 1600), scanFilter: "f",
                                                  mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=2", selectedIonMz: pep.MonoisotopicMass.ToMz(1),
                                                  selectedIonChargeStateGuess: 1, selectedIonIntensity: 1e6, isolationMZ: pep.MonoisotopicMass.ToMz(1), isolationWidth: 1.5, dissociationType: DissociationType.HCD,
                                                  oneBasedPrecursorScanNumber: 1, selectedIonMonoisotopicGuessMz: pep.MonoisotopicMass.ToMz(1), hcdEnergy: "35");

                        // write the .mzML
                        string fullPath = Path.Combine(unitTestFolder, fileToWrite);
                        IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(
                            new MsDataFile(scans, new SourceFile(@"scan number only nativeID format", "mzML format", null, "SHA-1", @"C:\fake.mzML", null)),
                            fullPath, false);

                        var spectraFileInfo = new SpectraFileInfo(fullPath, condition, b, r, f);
                        fileInfos.Add(spectraFileInfo);
                    }
                }
            }

            // write the experimental design for this quantification test
            if (hasDefinedExperimentalDesign)
            {
                ExperimentalDesign.WriteExperimentalDesignToFile(fileInfos);
            }

            // run the search/quantification
            SearchTask task = new SearchTask();

            task.RunTask(unitTestFolder, new List <DbForTask> {
                new DbForTask(dbName, false)
            }, fileInfos.Select(p => p.FullFilePathWithExtension).ToList(), "");

            // read in the protein quant results
            Assert.That(File.Exists(Path.Combine(unitTestFolder, "AllQuantifiedProteinGroups.tsv")));
            var lines = File.ReadAllLines(Path.Combine(unitTestFolder, "AllQuantifiedProteinGroups.tsv"));

            // check the intensity column headers
            var splitHeader            = lines[0].Split(new char[] { '\t' }).ToList();
            var intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity", StringComparison.OrdinalIgnoreCase)).ToList();

            Assert.That(intensityColumnHeaders.Count == 2);

            if (!hasDefinedExperimentalDesign)
            {
                Assert.That(intensityColumnHeaders[0] == "Intensity_file_b0f0r0");
                Assert.That(intensityColumnHeaders[1] == "Intensity_file_b1f0r0");
            }
            else
            {
                Assert.That(intensityColumnHeaders[0] == "Intensity_TestCondition_1");
                Assert.That(intensityColumnHeaders[1] == "Intensity_TestCondition_2");
            }

            // check the protein intensity values
            int    ind1       = splitHeader.IndexOf(intensityColumnHeaders[0]);
            int    ind2       = splitHeader.IndexOf(intensityColumnHeaders[1]);
            double intensity1 = double.Parse(lines[1].Split(new char[] { '\t' })[ind1]);
            double intensity2 = double.Parse(lines[1].Split(new char[] { '\t' })[ind2]);

            Assert.That(intensity1 > 0);
            Assert.That(intensity2 > 0);
            Assert.That(intensity1 == intensity2);

            Directory.Delete(unitTestFolder, true);
        }
Beispiel #21
0
        private DataPointAquisitionResults GetDataPoints()
        {
            DataPointAquisitionResults res = new DataPointAquisitionResults()
            {
                Ms1List = new List <LabeledMs1DataPoint>()
            };

            // Set of peaks, identified by m/z and retention time. If a peak is in here, it means it has been a part of an accepted identification, and should be rejected
            var peaksAddedFromMS1HashSet = new HashSet <Tuple <double, int> >();

            foreach (SpectrumMatch identification in high_scoring_topdown_hits.OrderByDescending(h => h.score).ThenBy(h => h.pscore).ThenBy(h => h.reported_mass))
            {
                int scanNum = myMsDataFile.GetClosestOneBasedSpectrumNumber(identification.ms2_retention_time);

                List <int> scanNumbers = new List <int>()
                {
                    scanNum
                };
                int proteinCharge = identification.charge;

                Component matching_component = null;
                if (identification.filename != raw_file.filename) //if calibrating across files find component with matching mass and retention time
                {
                    //NOTE: only looking at components from same raw file... looking for components corresponding to td hits from any files w/ same br, fraction, condition however.
                    //look around theoretical mass of topdown hit identified proteoforms - 10 ppm and 5 minutes same br, tr, fraction, condition (same file!)
                    //if neucode labled, look for the light component mass (loaded in...)
                    List <Component> potential_matches = Sweet.lollipop.calibration_components.
                                                         Where(c => c.input_file.lt_condition == raw_file.lt_condition &&
                                                               c.input_file.biological_replicate == raw_file.biological_replicate &&
                                                               c.input_file.fraction == raw_file.fraction &&
                                                               c.input_file.technical_replicate == raw_file.technical_replicate).ToList();
                    if (potential_matches.Count > 0)
                    {
                        matching_component = potential_matches.Where(c =>
                                                                     Math.Abs(c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) - identification.theoretical_mass) * 1e6 / c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) < Sweet.lollipop.cali_mass_tolerance &&
                                                                     Math.Abs(c.rt_apex - identification.ms1_scan.RetentionTime) < Sweet.lollipop.cali_rt_tolerance).OrderBy(c => Math.Abs(c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) - identification.theoretical_mass)).FirstOrDefault();
                    }
                    else
                    {
                        matching_component = null;
                    }

                    if (matching_component == null)
                    {
                        continue;
                    }
                    scanNumbers.Clear();
                    //get scan numbers using retention time (if raw file is spliced, scan numbers change)
                    double rt = matching_component.min_rt;
                    while (Math.Round(rt, 2) <= Math.Round(matching_component.max_rt, 2))
                    {
                        int scanNumber = myMsDataFile.GetClosestOneBasedSpectrumNumber(rt);
                        scanNumbers.Add(scanNumber);
                        rt = myMsDataFile.GetOneBasedScan(scanNumber + 1).RetentionTime;
                    }
                    proteinCharge = matching_component.charge_states.OrderByDescending(c => c.intensity).First().charge_count;
                    if (matching_component.charge_states.Count == 1)
                    {
                        proteinCharge = identification.charge;
                    }
                }


                var formula = identification.GetChemicalFormula();
                if (formula == null)
                {
                    continue;
                }

                // Calculate isotopic distribution of the full peptide
                var dist = IsotopicDistribution.GetDistribution(formula, 0.1, 0.001);

                double[] masses      = dist.Masses.ToArray();
                double[] intensities = dist.Intensities.ToArray();

                Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x)));

                List <int> scansAdded = new List <int>();
                foreach (int scanNumber in scanNumbers)
                {
                    res.Ms1List.AddRange(SearchMS1Spectra(masses, intensities, scanNumber, -1, scansAdded, peaksAddedFromMS1HashSet, proteinCharge, identification));
                    res.Ms1List.AddRange(SearchMS1Spectra(masses, intensities, scanNumber, 1, scansAdded, peaksAddedFromMS1HashSet, proteinCharge, identification));
                }
            }
            return(res);
        }
        public void CatchIsotopicDistributionStuff()
        {
            ChemicalFormula formula = (ChemicalFormula.ParseFormula("C500O50H250N50"));

            IsotopicDistribution.GetDistribution(formula, 0.001, 1e-1, 1e-15);
        }
Beispiel #23
0
        public static void TestDeconvolution()
        {
            UsefulProteomicsDatabases.Loaders.LoadElements();

            // >sp|P49703|ARL4D_HUMAN ADP-ribosylation factor-like protein 4D OS=H**o sapiens OX=9606 GN=ARL4D PE=1 SV=2
            string sequence = "MGNHLTEMAPTASSFLPHFQALHVVVIGLDSAGKTSLLYRLKFKEFVQSVPTKGFNTEKIRVPLGGSRGITFQ" +
                              "VWDVGGQEKLRPLWRSYTRRTDGLVFVVDAAEAERLEEAKVELHRISRASDNQGVPVLVLANKQDQPGALSAA" +
                              "EVEKRLAVRELAAATLTHVQGCSAVDGLGLQQGLERLYEMILKRKKAARGGKKRR";
            int    charge = 15;
            double intensityMultiplier = 1e6;

            Proteomics.AminoAcidPolymer.Peptide baseSequence = new Proteomics.AminoAcidPolymer.Peptide(sequence);
            var formula = baseSequence.GetChemicalFormula();
            var isotopicDistribution = IsotopicDistribution.GetDistribution(formula, 0.125, 1e-8);

            double[] masses     = isotopicDistribution.Masses.ToArray();
            double[] abundances = isotopicDistribution.Intensities.ToArray();
            double   max        = abundances.Max();

            List <(double, double)> peaks = new List <(double, double)>();

            for (int i = 0; i < masses.Length; i++)
            {
                abundances[i] /= max;

                if (abundances[i] >= 0.05)
                {
                    peaks.Add((masses[i].ToMz(charge), abundances[i] * intensityMultiplier));
                }
            }

            Random r = new Random(1);

            for (int i = 0; i < 1000; i++)
            {
                double mz        = r.NextDouble() * 1200 + 400;
                double intensity = r.NextDouble() * 30000 + 30000;
                peaks.Add((mz, intensity));
            }

            peaks = peaks.OrderBy(p => p.Item1).ToList();

            var spectrum = new MzSpectrum(peaks.Select(p => p.Item1).ToArray(), peaks.Select(p => p.Item2).ToArray(), true);

            var engine = new DeconvolutionEngine(0, 0.4, 3, 0.4, 1.5, 5, 1, 60, 2);
            var envs   = engine.Deconvolute(spectrum, spectrum.Range).ToList();

            List <string> output = new List <string> {
                DeconvolutedEnvelope.TabDelimitedHeader
            };

            foreach (var env in envs)
            {
                env.SpectraFileName = "temp";
                output.Add(env.ToOutputString());
            }

            string path = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPfmExplorerDeconOutput.tsv");

            File.WriteAllLines(path, output);

            var species = InputReaderParser.ReadSpeciesFromFile(path, out var errors);

            Assert.That(!errors.Any());
            Assert.That(species.Count > 0);
            Assert.That(species.All(p => p.DeconvolutionFeature != null));
            Assert.That(species.All(p => p.DeconvolutionFeature.AnnotatedEnvelopes != null && p.DeconvolutionFeature.AnnotatedEnvelopes.Count > 0));
            Assert.That(species.All(p => p.DeconvolutionFeature.AnnotatedEnvelopes.All(v => v.PeakMzs.Count > 0)));

            //Assert.That(species.Count == 1);
        }
Beispiel #24
0
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            Status("Extracting data points:");
            // The final training point list

            int numMs1MassChargeCombinationsConsidered = 0;
            int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            int numMs2MassChargeCombinationsConsidered = 0;
            int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            List <LabeledDataPoint> Ms1List = new List <LabeledDataPoint>();
            List <LabeledDataPoint> Ms2List = new List <LabeledDataPoint>();

            object lockObj  = new object();
            object lockObj2 = new object();

            int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile;

            int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray();
            Parallel.ForEach(threads, (matchIndex) =>
            {
                for (; matchIndex < GoodIdentifications.Count; matchIndex += maxThreadsPerFile)
                {
                    // Stop loop if canceled
                    if (GlobalVariables.StopLoops)
                    {
                        return;
                    }

                    PeptideSpectralMatch identification = GoodIdentifications[matchIndex];

                    // Each identification has an MS2 spectrum attached to it.
                    int ms2scanNumber = identification.ScanNumber;
                    int peptideCharge = identification.ScanPrecursorCharge;
                    if (identification.FullSequence == null || identification.BestMatchingPeptides.Any(p => p.Peptide.AllModsOneIsNterminus.Any(m => m.Value.ChemicalFormula == null)))
                    {
                        continue;
                    }

                    var representativeSinglePeptide = identification.BestMatchingPeptides.First().Peptide;

                    // Get the peptide, don't forget to add the modifications!!!!
                    var SequenceWithChemicalFormulas = representativeSinglePeptide.SequenceWithChemicalFormulas;
                    if (SequenceWithChemicalFormulas == null || representativeSinglePeptide.AllModsOneIsNterminus.Any(b => b.Value.NeutralLosses != null))
                    {
                        continue;
                    }

                    Peptide coolPeptide = new Peptide(SequenceWithChemicalFormulas);

                    var ms2tuple = SearchMS2Spectrum(MyMsDataFile.GetOneBasedScan(ms2scanNumber), identification);

                    lock (lockObj2)
                    {
                        Ms2List.AddRange(ms2tuple);
                    }

                    // Calculate isotopic distribution of the full peptide
                    var dist = IsotopicDistribution.GetDistribution(coolPeptide.GetChemicalFormula(), FineResolutionForIsotopeDistCalculation, 0.001);

                    double[] theoreticalMasses      = dist.Masses.ToArray();
                    double[] theoreticalIntensities = dist.Intensities.ToArray();

                    Array.Sort(theoreticalIntensities, theoreticalMasses, Comparer <double> .Create((x, y) => y.CompareTo(x)));

                    var ms1tupleBack = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, -1, peptideCharge, identification);

                    var ms1tupleForward = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, 1, peptideCharge, identification);

                    lock (lockObj)
                    {
                        Ms1List.AddRange(ms1tupleBack.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleBack.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleBack.Item3;
                        Ms1List.AddRange(ms1tupleForward.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleForward.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleForward.Item3;
                    }
                }
            });

            // datapoints are ordered because they were acquired in a parallized search and we want repeatable results
            return(new DataPointAquisitionResults(this,
                                                  GoodIdentifications,
                                                  Ms1List.OrderBy(p => p.ScanNumber).ThenBy(p => p.ExperimentalMz).ToList(),
                                                  Ms2List.OrderBy(p => p.ScanNumber).ThenBy(p => p.ExperimentalMz).ToList(),
                                                  numMs1MassChargeCombinationsConsidered,
                                                  numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks,
                                                  numMs2MassChargeCombinationsConsidered,
                                                  numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks
                                                  ));
        }
Beispiel #25
0
 public static void TestIsotopicDistribution2()
 {
     IsotopicDistribution.GetDistribution(ChemicalFormula.ParseFormula("AlO{16}"));
 }
Beispiel #26
0
        public static void TestFlashLfqMatchBetweenRunsProteinQuant()
        {
            List <string> filesToWrite = new List <string> {
                "mzml_1", "mzml_2"
            };
            List <string> pepSequences = new List <string> {
                "PEPTIDE", "PEPTIDEV", "PEPTIDEVV", "PEPTIDEVVV", "PEPTIDEVVVV"
            };
            double intensity = 1e6;

            double[] file1Rt = new double[] { 1.01, 1.02, 1.03, 1.04, 1.05 };
            double[] file2Rt = new double[] { 1.015, 1.030, 1.036, 1.050, 1.065 };

            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, @"elements.dat"));

            // generate mzml files (5 peptides each)
            for (int f = 0; f < filesToWrite.Count; f++)
            {
                // 1 MS1 scan per peptide
                MsDataScan[] scans = new MsDataScan[5];

                for (int p = 0; p < pepSequences.Count; p++)
                {
                    ChemicalFormula      cf          = new Proteomics.AminoAcidPolymer.Peptide(pepSequences[p]).GetChemicalFormula();
                    IsotopicDistribution dist        = IsotopicDistribution.GetDistribution(cf, 0.125, 1e-8);
                    double[]             mz          = dist.Masses.Select(v => v.ToMz(1)).ToArray();
                    double[]             intensities = dist.Intensities.Select(v => v * intensity).ToArray();
                    double rt;
                    if (f == 0)
                    {
                        rt = file1Rt[p];
                    }
                    else
                    {
                        rt = file2Rt[p];
                    }

                    // add the scan
                    scans[p] = new MsDataScan(massSpectrum: new MzSpectrum(mz, intensities, false), oneBasedScanNumber: p + 1, msnOrder: 1, isCentroid: true,
                                              polarity: Polarity.Positive, retentionTime: rt, scanWindowRange: new MzRange(400, 1600), scanFilter: "f",
                                              mzAnalyzer: MZAnalyzerType.Orbitrap, totalIonCurrent: intensities.Sum(), injectionTime: 1.0, noiseData: null, nativeId: "scan=" + (p + 1));
                }

                // write the .mzML
                IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(new FakeMsDataFile(scans),
                                                                              Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[f] + ".mzML"), false);
            }

            // set up spectra file info
            SpectraFileInfo file1 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[0] + ".mzML"), "a", 0, 0, 0);
            SpectraFileInfo file2 = new SpectraFileInfo(Path.Combine(TestContext.CurrentContext.TestDirectory, filesToWrite[1] + ".mzML"), "a", 1, 0, 0);

            // create some PSMs
            var pg = new ProteinGroup("MyProtein", "gene", "org");
            var myMbrProteinGroup = new ProteinGroup("MyMbrProtein", "MbrGene", "org");

            Identification id1 = new Identification(file1, "PEPTIDE", "PEPTIDE",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDE").MonoisotopicMass, file1Rt[0] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id2 = new Identification(file1, "PEPTIDEV", "PEPTIDEV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEV").MonoisotopicMass, file1Rt[1] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id3 = new Identification(file1, "PEPTIDEVV", "PEPTIDEVV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVV").MonoisotopicMass, file1Rt[2] + 0.001, 1, new List <ProteinGroup> {
                myMbrProteinGroup
            });
            Identification id4 = new Identification(file1, "PEPTIDEVVV", "PEPTIDEVVV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVV").MonoisotopicMass, file1Rt[3] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id5 = new Identification(file1, "PEPTIDEVVVV", "PEPTIDEVVVV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVVV").MonoisotopicMass, file1Rt[4] + 0.001, 1, new List <ProteinGroup> {
                pg
            });

            Identification id6 = new Identification(file2, "PEPTIDE", "PEPTIDE",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDE").MonoisotopicMass, file2Rt[0] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id7 = new Identification(file2, "PEPTIDEV", "PEPTIDEV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEV").MonoisotopicMass, file2Rt[1] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            // missing ID 8 - MBR feature
            Identification id9 = new Identification(file2, "PEPTIDEVVV", "PEPTIDEVVV",
                                                    new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVV").MonoisotopicMass, file2Rt[3] + 0.001, 1, new List <ProteinGroup> {
                pg
            });
            Identification id10 = new Identification(file2, "PEPTIDEVVVV", "PEPTIDEVVVV",
                                                     new Proteomics.AminoAcidPolymer.Peptide("PEPTIDEVVVV").MonoisotopicMass, file2Rt[4] + 0.001, 1, new List <ProteinGroup> {
                pg
            });

            // test with top3 protein quant engine
            FlashLFQEngine engine = new FlashLFQEngine(new List <Identification> {
                id1, id2, id3, id4, id5, id6, id7, id9, id10
            }, matchBetweenRuns: true);
            var results = engine.Run();

            Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file1) > 0);
            Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file2) == 0);

            // test with advanced protein quant engine
            engine = new FlashLFQEngine(new List <Identification> {
                id1, id2, id3, id4, id5, id6, id7, id9, id10
            }, matchBetweenRuns: true, advancedProteinQuant: true);
            results = engine.Run();

            Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file1) > 0);
            Assert.That(results.ProteinGroups["MyMbrProtein"].GetIntensity(file2) == 0);
        }
Beispiel #27
0
        public static void CatchProbStuff()
        {
            ChemicalFormula formula = (ChemicalFormula.ParseFormula("C50O50"));

            IsotopicDistribution.GetDistribution(formula, 0.001, 1e-50, 1e-15);
        }