Exemplo n.º 1
0
        public static void WriteEmptyScan()
        {
            double[]       intensities1 = new double[] { };
            double[]       mz1          = new double[] { };
            MzmlMzSpectrum massSpec1    = new MzmlMzSpectrum(mz1, intensities1, false);

            IMzmlScan[] scans = new IMzmlScan[] {
                new MzmlScan(1, massSpec1, 1, true, Polarity.Positive, 1, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec1.SumOfAllY, null, "1")
            };
            FakeMsDataFile f = new FakeMsDataFile(scans);

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(f, Path.Combine(TestContext.CurrentContext.TestDirectory, "mzmlWithEmptyScan.mzML"), false);

            Mzml ok = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, "mzmlWithEmptyScan.mzML"));

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "mzmlWithEmptyScan2.mzML"), false);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Tests peak filtering for ThermoRawFileReader
        /// </summary>
        public static void TestPeakFilteringRawFileReader(string infile)
        {
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            var filterParams = new FilteringParams(200, 0.01, 0, 1, false, true, true);

            var path = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", infile);

            var a        = ThermoRawFileReaderData.LoadAllStaticData(path, filterParams, maxThreads: 1);
            var rawScans = a.GetAllScansList();

            foreach (var scan in rawScans)
            {
                Assert.That(scan.MassSpectrum.XArray.Length <= 200);
            }

            string outfile1 = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", Path.GetFileNameWithoutExtension(infile) + ".mzML");

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, outfile1, false);
            var mzml = Mzml.LoadAllStaticData(outfile1, filterParams, maxThreads: 1);

            var mzmlScans = mzml.GetAllScansList();

            for (int i = 0; i < mzmlScans.Count; i++)
            {
                var mzmlScan = mzmlScans[i];
                var rawScan  = rawScans[i];

                for (int j = 0; j < mzmlScan.MassSpectrum.XArray.Length; j++)
                {
                    double roundedMzmlMz = Math.Round(mzmlScan.MassSpectrum.XArray[j], 2);
                    double roundedRawMz  = Math.Round(rawScan.MassSpectrum.XArray[j], 2);

                    Assert.AreEqual(roundedMzmlMz, roundedRawMz);

                    double roundedMzmlIntensity = Math.Round(mzmlScan.MassSpectrum.XArray[j], 0);
                    double roundedRawIntensity  = Math.Round(rawScan.MassSpectrum.XArray[j], 0);

                    Assert.AreEqual(roundedMzmlIntensity, roundedRawIntensity);
                }
            }

            Console.WriteLine($"Analysis time for TestPeakFilteringRawFileReader: {stopwatch.Elapsed.Hours}h " +
                              $"{stopwatch.Elapsed.Minutes}m {stopwatch.Elapsed.Seconds}s");
        }
Exemplo n.º 3
0
        public void WriteMzmlTest()
        {
            var peptide = new Peptide("GPEAPPPALPAGAPPPCTAVTSDHLNSLLGNILR");
            OldSchoolChemicalFormulaModification carbamidomethylationOfCMod = new OldSchoolChemicalFormulaModification(ChemicalFormula.ParseFormula("H3C2NO"), "carbamidomethylation of C", ModificationSites.C);

            peptide.AddModification(carbamidomethylationOfCMod);

            MzmlMzSpectrum MS1 = CreateSpectrum(peptide.GetChemicalFormula(), 300, 2000, 1);

            MzmlMzSpectrum MS2 = CreateMS2spectrum(peptide.Fragment(FragmentTypes.b | FragmentTypes.y, true), 100, 1500);

            IMzmlScan[] Scans = new IMzmlScan[2];

            Scans[0] = new MzmlScan(1, MS1, 1, true, Polarity.Positive, 1.0, new MzRange(300, 2000), " first spectrum", MZAnalyzerType.Unknown, MS1.SumOfAllY, 1, "scan=1");

            Scans[1] = new MzmlScanWithPrecursor(2, MS2, 2, true, Polarity.Positive, 2.0, new MzRange(100, 1500), " second spectrum", MZAnalyzerType.Unknown, MS2.SumOfAllY, 1134.26091302033, 3, 0.141146966879759, 1134.3, 1, DissociationType.Unknown, 1, 1134.26091302033, 1, "scan=2");

            var myMsDataFile = new FakeMsDataFile(Scans);

            var oldFirstValue = myMsDataFile.GetOneBasedScan(1).MassSpectrum.FirstX;

            var secondScan = myMsDataFile.GetOneBasedScan(2) as IMsDataScanWithPrecursor <MzmlMzSpectrum>;

            Assert.AreEqual(1, secondScan.IsolationRange.Maximum - secondScan.IsolationRange.Minimum);

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, "argh.mzML", false);

            Mzml okay = Mzml.LoadAllStaticData(@"argh.mzML");

            okay.GetOneBasedScan(2);

            Assert.AreEqual(1, okay.GetClosestOneBasedSpectrumNumber(1));
            Assert.AreEqual(2, okay.GetClosestOneBasedSpectrumNumber(2));

            var newFirstValue = okay.GetOneBasedScan(1).MassSpectrum.FirstX;

            Assert.AreEqual(oldFirstValue.Value, newFirstValue.Value, 1e-9);

            var secondScan2 = okay.GetOneBasedScan(2) as IMsDataScanWithPrecursor <MzmlMzSpectrum>;

            Assert.AreEqual(1, secondScan2.IsolationRange.Maximum - secondScan2.IsolationRange.Minimum);

            secondScan2.MassSpectrum.ReplaceXbyApplyingFunction((a) => 44);
            Assert.AreEqual(44, secondScan2.MassSpectrum.LastX);
        }
Exemplo n.º 4
0
        public void LoadThermoTest()
        {
            ThermoRawFile a = new ThermoRawFile(@"Shew_246a_LCQa_15Oct04_Andro_0904-2_4-20.RAW");

            a.Open();
            a.Open();
            Assert.AreEqual(1, a.FirstSpectrumNumber);
            Assert.AreEqual(3316, a.LastSpectrumNumber);
            Assert.AreEqual(3316, a.LastSpectrumNumber);

            var scan = a.GetScan(53);

            Assert.AreEqual(1.2623333333333333, scan.RetentionTime);
            Assert.AreEqual(1, scan.MsnOrder);
            Assert.AreEqual("controllerType=0 controllerNumber=1 scan=53", scan.id);
            Assert.AreEqual("+ c ESI Full ms [400.00-2000.00]", scan.ScanFilter);


            var spectrum = a.GetScan(53).MassSpectrum;

            var peak = spectrum.PeakWithHighestY;

            Assert.AreEqual(75501, peak.Intensity);

            Assert.AreEqual(1, spectrum.newSpectrumFilterByY(7.5e4).Count);
            Assert.AreEqual(2, spectrum.newSpectrumExtract(new DoubleRange(923, 928)).Count);


            Assert.AreEqual(double.NaN, spectrum.GetSignalToNoise(1));

            Assert.AreEqual("1.3", a.GetSofwareVersion());
            double ya;

            a.GetScan(948).TryGetSelectedIonGuessIntensity(out ya);
            Assert.AreEqual(4125760, ya);

            Assert.AreEqual("LCQ", a.GetInstrumentName());
            Assert.AreEqual("LCQ", a.GetInstrumentModel());

            Assert.AreEqual(0, a.GetMSXPrecursors(1289).Count);
            Assert.AreEqual(1, a.GetMSXPrecursors(1290).Count);
            Assert.AreEqual(1194.53, a.GetMSXPrecursors(1290).First());

            MzmlMethods.CreateAndWriteMyIndexedMZmlwithCalibratedSpectra(a, Path.Combine(Path.GetDirectoryName(a.FilePath), Path.GetFileNameWithoutExtension(a.FilePath)) + ".mzML");
        }
Exemplo n.º 5
0
        /// <summary>
        /// Tests LoadAllStaticData for ThermoRawFileReader
        /// </summary>
        public static void TestLoadAllStaticDataRawFileReader(string infile, string outfile1, string outfile2)
        {
            var path = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", infile);

            outfile1 = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", outfile1);
            outfile2 = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", outfile2);

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            var a = ThermoRawFileReader.LoadAllStaticData(path, maxThreads: 1);

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, outfile1, false);
            var aa = Mzml.LoadAllStaticData(outfile1);

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(aa, outfile2, true);
            Mzml.LoadAllStaticData(outfile2);
            Console.WriteLine($"Analysis time for TestLoadAllStaticDataRawFileReader({infile}): {stopwatch.Elapsed.Hours}h {stopwatch.Elapsed.Minutes}m {stopwatch.Elapsed.Seconds}s");
        }
Exemplo n.º 6
0
        public static void AnotherMzMLtest()
        {
            IMzmlScan[] scans = new IMzmlScan[4];

            double[]       intensities1 = new double[] { 1 };
            double[]       mz1          = new double[] { 50 };
            MzmlMzSpectrum massSpec1    = new MzmlMzSpectrum(mz1, intensities1, false);

            scans[0] = new MzmlScan(1, massSpec1, 1, true, Polarity.Positive, 1, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec1.SumOfAllY, null, "1");

            double[]       intensities2 = new double[] { 1 };
            double[]       mz2          = new double[] { 30 };
            MzmlMzSpectrum massSpec2    = new MzmlMzSpectrum(mz2, intensities2, false);

            scans[1] = new MzmlScanWithPrecursor(2, massSpec2, 2, true, Polarity.Positive, 2, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec2.SumOfAllY,
                                                 50, null, null, 50, 1, DissociationType.CID, 1, null, null, "2");

            double[]       intensities3 = new double[] { 1 };
            double[]       mz3          = new double[] { 50 };
            MzmlMzSpectrum massSpec3    = new MzmlMzSpectrum(mz3, intensities3, false);

            scans[2] = new MzmlScan(3, massSpec3, 1, true, Polarity.Positive, 1, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec1.SumOfAllY, null, "3");

            double[]       intensities4 = new double[] { 1 };
            double[]       mz4          = new double[] { 30 };
            MzmlMzSpectrum massSpec4    = new MzmlMzSpectrum(mz4, intensities4, false);

            scans[3] = new MzmlScanWithPrecursor(4, massSpec4, 2, true, Polarity.Positive, 2, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec2.SumOfAllY,
                                                 50, null, null, 50, 1, DissociationType.CID, 3, null, null, "4");

            FakeMsDataFile f = new FakeMsDataFile(scans);

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(f, Path.Combine(TestContext.CurrentContext.TestDirectory, "what.mzML"), false);

            Mzml ok = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, "what.mzML"));

            var scanWithPrecursor = ok.Last(b => b is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >;

            Assert.AreEqual(3, scanWithPrecursor.OneBasedPrecursorScanNumber);
        }
Exemplo n.º 7
0
        public void LoadMzmlFromConvertedMGFTest()
        {
            Mzml a = Mzml.LoadAllStaticData(@"tester.mzML");

            var ya = a.GetOneBasedScan(1).MassSpectrum;

            Assert.AreEqual(192, ya.Size);
            var ya2 = a.GetOneBasedScan(3).MassSpectrum;

            Assert.AreEqual(165, ya2.Size);
            var ya3 = a.GetOneBasedScan(5).MassSpectrum;

            Assert.AreEqual(551, ya3.Size);
            var ya4 = a.GetOneBasedScan(975).MassSpectrum;

            Assert.AreEqual(190, ya4.Size);

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, "CreateFileFromConvertedMGF.mzML", false);

            Mzml b = Mzml.LoadAllStaticData(@"CreateFileFromConvertedMGF.mzML");

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(b, "CreateFileFromConvertedMGF2.mzML", false);
        }
Exemplo n.º 8
0
        public static void LoadThermoTest2()
        {
            ThermoStaticData a = ThermoStaticData.LoadAllStaticData(@"05-13-16_cali_MS_60K-res_MS.raw");

            Assert.AreEqual(360, a.NumSpectra);
            Assert.GreaterOrEqual(1000, a.GetOneBasedScan(1).MassSpectrum.Extract(0, 500).Last().Mz);
            Assert.AreEqual(2, a.GetOneBasedScan(1).MassSpectrum.FilterByY(5e6, double.MaxValue).Count());
            var ye = a.GetOneBasedScan(1).MassSpectrum.CopyTo2DArray();

            Assert.AreEqual(77561752, a.GetOneBasedScan(1).TotalIonCurrent);
            Assert.AreEqual(144, a.GetClosestOneBasedSpectrumNumber(2));

            MzSpectrum newSpectrum = new MzSpectrum(a.GetOneBasedScan(51).MassSpectrum.XArray, a.GetOneBasedScan(51).MassSpectrum.YArray, true);

            Assert.AreEqual(1120, a.GetOneBasedScan(1).MassSpectrum.Size);

            var newDeconvolution = a.GetOneBasedScan(1).MassSpectrum.Deconvolute(new MzRange(double.MinValue, double.MaxValue), 1, 10, 1, 4).ToList();

            Assert.IsTrue(newDeconvolution.Any(b => Math.Abs(b.peaks.First().mz.ToMass(b.charge) - 523.257) < 0.001));

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, Path.Combine(TestContext.CurrentContext.TestDirectory, "convertedThermo.mzML"), false);

            var sdafaf = a.Deconvolute(null, null, 1, 30, 10, 3, 10, b => true).OrderByDescending(b => b.NumPeaks).First();

            Assert.IsTrue(Math.Abs(262.64 - sdafaf.Mass.ToMz(2)) <= 0.01);

            using (ThermoDynamicData dynamicThermo = ThermoDynamicData.InitiateDynamicConnection(@"05-13-16_cali_MS_60K-res_MS.raw"))
            {
                Assert.AreEqual(136, dynamicThermo.GetClosestOneBasedSpectrumNumber(1.89));
                dynamicThermo.ClearCachedScans();
            }

            Mzml readCovertedMzmlFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, "convertedThermo.mzML"));

            Assert.AreEqual(a.GetAllScansList().First().Polarity, readCovertedMzmlFile.GetAllScansList().First().Polarity);
        }
Exemplo n.º 9
0
        public static void ReadWriteReadEtc()
        {
            {
                ThermoStaticData a = ThermoStaticData.LoadAllStaticData(@"testFileWMS2.raw");

                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, "a.mzML", false);

                var aa = Mzml.LoadAllStaticData("a.mzML");

                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(aa, "aa.mzML", true);

                Mzml.LoadAllStaticData("aa.mzML");
            }
            {
                ThermoStaticData a = ThermoStaticData.LoadAllStaticData(@"small.raw");

                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, "a.mzML", false);

                var aa = Mzml.LoadAllStaticData("a.mzML");

                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(aa, "aa.mzML", true);

                Mzml.LoadAllStaticData("aa.mzML");
            }
            {
                ThermoStaticData a = ThermoStaticData.LoadAllStaticData(@"05-13-16_cali_MS_60K-res_MS.raw");

                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, "a.mzML", false);

                var aa = Mzml.LoadAllStaticData("a.mzML");

                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(aa, "aa.mzML", true);

                Mzml.LoadAllStaticData("aa.mzML");
            }
        }
Exemplo n.º 10
0
        public static void TestIndividualFileOutput()
        {
            string subFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"IndividualOutputTest");

            Directory.CreateDirectory(subFolder);
            string     outputFolder   = Path.Combine(subFolder, "Results");
            SearchTask allowFilesTask = new SearchTask();

            allowFilesTask.SearchParameters.WriteIndividualFiles    = true;
            allowFilesTask.SearchParameters.CompressIndividualFiles = false;

            SearchTask compressFilesTask = new SearchTask();

            compressFilesTask.SearchParameters.WriteIndividualFiles    = true;
            compressFilesTask.SearchParameters.CompressIndividualFiles = true;

            SearchTask noFilesTask = new SearchTask();

            noFilesTask.SearchParameters.WriteIndividualFiles = false;

            PeptideWithSetModifications       pwsm  = new PeptideWithSetModifications("AAFNSGK", null);
            List <(string, MetaMorpheusTask)> tasks = new List <(string, MetaMorpheusTask)> {
                ("allowFiles", allowFilesTask), ("compressFiles", compressFilesTask), ("noFiles", noFilesTask)
            };
            DbForTask db = new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", "gapdh.fasta"), false);

            TestDataFile datafile = new TestDataFile(pwsm);
            string       pathOne  = Path.Combine(subFolder, "fileOne.mzml");
            string       pathTwo  = Path.Combine(subFolder, "fileTwo.mzml");

            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(datafile, pathOne, false);
            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(datafile, pathTwo, false);

            new EverythingRunnerEngine(tasks, new List <string> {
                pathOne, pathTwo
            }, new List <DbForTask> {
                db
            }, outputFolder).Run();

            //check that the first task wrote everything fine
            HashSet <string> expectedFiles = new HashSet <string>
            {
                ".mzID",
                "_Peptides.psmtsv",
                "_ProteinGroups.tsv",
                "_PSMs.psmtsv",
                "_PSMsFormattedForPercolator.tab",
                "_QuantifiedPeaks.tsv"
            };
            HashSet <string> writtenFiles = new HashSet <string>(Directory.GetFiles(Path.Combine(outputFolder, "allowFiles", "Individual File Results")).Select(v => Path.GetFileName(v).Substring(7)));

            //check they're the same
            Assert.IsTrue(expectedFiles.Except(writtenFiles).Count() == 0);

            //check the second one is compressed and contains all the information
            writtenFiles = new HashSet <string>(Directory.GetFiles(Path.Combine(outputFolder, "compressFiles")).Select(v => Path.GetFileName(v)));
            //check the zip exists
            Assert.IsTrue(writtenFiles.Contains("Individual File Results.zip"));
            //check the original folder does not exist
            string[] subfolders = Directory.GetDirectories(Path.Combine(outputFolder, "compressFiles"));
            Assert.IsTrue(subfolders.Length == 0);
            ZipFile.ExtractToDirectory(Path.Combine(outputFolder, "compressFiles", "Individual File Results.zip"), Path.Combine(outputFolder, "compressFiles", "Individual File Results"));
            //read the extracted files
            writtenFiles = new HashSet <string>(Directory.GetFiles(Path.Combine(outputFolder, "compressFiles", "Individual File Results")).Select(v => Path.GetFileName(v).Substring(7)));
            //check they're the same
            Assert.IsTrue(expectedFiles.Except(writtenFiles).Count() == 0);

            //check the last one to make sure nothing was written except for the mzID files
            writtenFiles = new HashSet <string>(Directory.GetFiles(Path.Combine(outputFolder, "noFiles", "Individual File Results")).Select(v => Path.GetFileName(v).Substring(7)));
            Assert.IsTrue(writtenFiles.Count == 1);
            Assert.IsTrue(writtenFiles.Contains(".mzID"));

            Directory.Delete(outputFolder, true);

            //Do a check that we don't crash if there's only one file but somebody tries to zip the individual file results
            SearchTask weirdTask = new SearchTask();

            weirdTask.SearchParameters.CompressIndividualFiles = true;
            weirdTask.SearchParameters.WriteMzId = false;
            new EverythingRunnerEngine(new List <(string, MetaMorpheusTask)> {
                ("weird", weirdTask)
            }, new List <string> {
Exemplo n.º 11
0
        public bool Run_TdMzCal(InputFile raw_file, List <SpectrumMatch> topdown_hits)
        {
            all_topdown_hits = topdown_hits.Where(h => h.score > 0).ToList();
            //need to reset m/z in case same td hits used for multiple calibration raw files...
            Parallel.ForEach(all_topdown_hits, h => h.mz = h.reported_mass.ToMz(h.charge));

            high_scoring_topdown_hits = all_topdown_hits.Where(h => h.score >= 40).ToList();
            this.raw_file             = raw_file;

            if (high_scoring_topdown_hits.Count < 5)
            {
                return(false);
            }

            myMsDataFile = Path.GetExtension(raw_file.complete_path) == ".raw" ?
                           ThermoStaticData.LoadAllStaticData(raw_file.complete_path) :
                           null;
            if (myMsDataFile == null)
            {
                myMsDataFile = Mzml.LoadAllStaticData(raw_file.complete_path);
            }
            if (myMsDataFile == null)
            {
                return(false);
            }

            DataPointAquisitionResults dataPointAcquisitionResult = GetDataPoints();

            if (dataPointAcquisitionResult.Ms1List.Count < 10)
            {
                return(false);
            }

            if (Sweet.lollipop.mass_calibration)
            {
                var myMs1DataPoints = new List <(double[] xValues, double yValue)>();

                for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++)
                {
                    //x values
                    var explanatoryVariables = new double[4];
                    explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].mz;
                    explanatoryVariables[1] = dataPointAcquisitionResult.Ms1List[i].retentionTime;
                    explanatoryVariables[2] = dataPointAcquisitionResult.Ms1List[i].logTotalIonCurrent;
                    explanatoryVariables[3] = dataPointAcquisitionResult.Ms1List[i].logInjectionTime;

                    //yvalue
                    double mzError = dataPointAcquisitionResult.Ms1List[i].massError;

                    myMs1DataPoints.Add((explanatoryVariables, mzError));
                }

                var ms1Model = GetRandomForestModel(myMs1DataPoints);

                CalibrateHitsAndComponents(ms1Model);
                if (Sweet.lollipop.calibrate_raw_files)
                {
                    MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile,
                                                                          raw_file.directory + "\\" + raw_file.filename + "_calibrated.mzML", false);
                }
            }

            if (Sweet.lollipop.retention_time_calibration)
            {
                var myMs1DataPoints = new List <(double[] xValues, double yValue)>();
                List <SpectrumMatch> firstElutingTopDownHit = new List <SpectrumMatch>();
                List <string>        PFRs = high_scoring_topdown_hits.Select(h => h.pfr_accession).Distinct().ToList();
                foreach (var PFR in PFRs)
                {
                    var firstHitWithPFR = high_scoring_topdown_hits
                                          .Where(h => h.pfr_accession == PFR).OrderBy(h => h.ms2_retention_time).First();
                    firstElutingTopDownHit.Add(firstHitWithPFR);
                }

                for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++)
                {
                    if (firstElutingTopDownHit.Contains(dataPointAcquisitionResult.Ms1List[i].identification))
                    {
                        //x values
                        var explanatoryVariables = new double[1];
                        explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].retentionTime;

                        //yvalue
                        double RTError = dataPointAcquisitionResult.Ms1List[i].RTError;

                        myMs1DataPoints.Add((explanatoryVariables, RTError));
                    }
                }

                if (myMs1DataPoints.Count < 10)
                {
                    return(false);
                }

                var ms1Model = GetRandomForestModel(myMs1DataPoints);

                foreach (Component c in Sweet.lollipop.calibration_components.Where(h => h.input_file.lt_condition == raw_file.lt_condition && h.input_file.biological_replicate == raw_file.biological_replicate && h.input_file.fraction == raw_file.fraction && h.input_file.technical_replicate == raw_file.technical_replicate))
                {
                    c.rt_apex = c.rt_apex - ms1Model.Predict(new double[] { c.rt_apex });
                }
            }
            return(true);
        }
Exemplo n.º 12
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following calibration settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the calibration task
            Status("Calibrating...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewSpectra           = new List <string>(),
                NewFileSpecificTomls = new List <string>()
            };

            var           myFileManager = new MyFileManager(true);
            List <string> spectraFilesAfterCalibration = new List <string>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                bool couldNotFindEnoughDatapoints = false;

                // get filename stuff
                var    originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
                var    originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);
                string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML");

                // mark the file as in-progress
                StartingDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                // load the file
                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files"
                });

                var myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters);

                // get datapoints to fit calibration function to
                Status("Acquiring calibration data points...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                DataPointAquisitionResults acquisitionResults = null;

                for (int i = 1; i <= 5; i++)
                {
                    acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                    // enough data points to calibrate?
                    if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints)
                    {
                        break;
                    }

                    if (i == 1) // failed round 1
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(20);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(50);
                    }
                    else if (i == 2) // failed round 2
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(30);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(100);
                    }
                    else if (i == 3) // failed round 3
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(40);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(150);
                    }
                    else // failed round 4
                    {
                        if (acquisitionResults.Psms.Count < NumRequiredPsms)
                        {
                            Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count);
                        }
                        if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count);
                        }
                        if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count);
                        }

                        couldNotFindEnoughDatapoints = true;
                        FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                            taskId, "Individual Spectra Files", originalUncalibratedFilePath
                        });
                        break;
                    }

                    Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " +
                         Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " +
                         Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product");
                }

                if (couldNotFindEnoughDatapoints)
                {
                    spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(currentRawFileList[spectraFileIndex]));
                    ReportProgress(new ProgressEventArgs(100, "Failed to calibrate!", new List <string> {
                        taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                    }));
                    continue;
                }

                // stats before calibration
                int    prevPsmCount = acquisitionResults.Psms.Count;
                double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double preCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // generate calibration function and shift data points
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                CalibrationEngine engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                engine.Run();

                //update file
                myMsDataFile = engine.CalibratedDataFile;

                // do another search to evaluate calibration results
                Status("Post-calibration search...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                //generate calibration function and shift data points AGAIN because it's fast and contributes new data
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                engine.Run();

                //update file
                myMsDataFile = engine.CalibratedDataFile;

                // write the calibrated mzML file
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false);
                myFileManager.DoneWithFile(originalUncalibratedFilePath);

                // stats after calibration
                int    postCalibrationPsmCount          = acquisitionResults.Psms.Count;
                double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double postCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // did the data improve? (not used for anything yet...)
                bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr);

                // write toml settings for the calibrated file
                var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");

                var fileSpecificParams = new FileSpecificParameters();

                // carry over file-specific parameters from the uncalibrated file to the calibrated one
                if (fileSettingsList[spectraFileIndex] != null)
                {
                    fileSpecificParams = fileSettingsList[spectraFileIndex].Clone();
                }

                //suggest 4 * interquartile range as the ppm tolerance
                fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
                fileSpecificParams.ProductMassTolerance   = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));

                Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);

                FinishedWritingFile(newTomlFileName, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });

                // finished calibrating this file
                spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(calibratedFilePath));
                FinishedWritingFile(calibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                MyTaskResults.NewSpectra.Add(calibratedFilePath);
                MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName);
                FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }));
            }

            // re-write experimental design (if it has been defined) with new calibrated file names
            string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName;

            assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName);

            if (File.Exists(assumedPathToExperDesign))
            {
                WriteNewExperimentalDesignFile(assumedPathToExperDesign, OutputFolder, spectraFilesAfterCalibration);
            }

            // finished calibrating all files for the task
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            return(MyTaskResults);
        }
Exemplo n.º 13
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            // load modifications
            Status("Loading modifications...", new List <string> {
                taskId
            });
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = GlobalVariables.AllModTypesKnown.ToList();

            // what types of fragment ions to search for
            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.BnoB1ions);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following calibration settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the calibration task
            Status("Calibrating...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewSpectra           = new List <string>(),
                NewFileSpecificTomls = new List <string>()
            };

            object lock1 = new object();

            var myFileManager = new MyFileManager(true);

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                // get filename stuff
                var    originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
                var    originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);
                string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML");

                // mark the file as in-progress
                StartingDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MsDataFile myMsDataFile;

                // load the file
                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                lock (lock1)
                {
                    myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks, CommonParameters);
                }

                // get datapoints to fit calibration function to
                Status("Acquiring calibration data points...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                DataPointAquisitionResults acquisitionResults = null;

                for (int i = 1; i <= 5; i++)
                {
                    acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                    // enough data points to calibrate?
                    if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints)
                    {
                        break;
                    }

                    if (i == 1) // failed round 1
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(20);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(50);
                    }
                    else if (i == 2) // failed round 2
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(30);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(100);
                    }
                    else if (i == 3) // failed round 3
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(40);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(150);
                    }
                    else // failed round 4
                    {
                        if (acquisitionResults.Psms.Count < NumRequiredPsms)
                        {
                            Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count);
                        }
                        if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count);
                        }
                        if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count);
                        }
                        FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                            taskId, "Individual Spectra Files", originalUncalibratedFilePath
                        });
                        return(MyTaskResults);
                    }

                    Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " +
                         Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " +
                         Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product");
                }

                // stats before calibration
                int    prevPsmCount = acquisitionResults.Psms.Count;
                double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double preCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // generate calibration function and shift data points
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }).Run();

                // do another search to evaluate calibration results
                Status("Post-calibration search...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                // stats after calibration
                int    postCalibrationPsmCount          = acquisitionResults.Psms.Count;
                double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double postCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // did the data improve? (not used for anything yet...)
                bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr);

                // write toml settings for the calibrated file
                var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");

                var fileSpecificParams = new FileSpecificParameters();

                // carry over file-specific parameters from the uncalibrated file to the calibrated one
                if (fileSettingsList[spectraFileIndex] != null)
                {
                    fileSpecificParams = fileSettingsList[spectraFileIndex].Clone();
                }

                // don't write over ppm tolerances if they've been specified by the user already in the file-specific settings
                // otherwise, suggest 4 * interquartile range as the ppm tolerance
                if (fileSpecificParams.PrecursorMassTolerance == null)
                {
                    fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
                }
                if (fileSpecificParams.ProductMassTolerance == null)
                {
                    fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));
                }

                Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);

                FinishedWritingFile(newTomlFileName, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });

                // write the calibrated mzML file
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false);
                myFileManager.DoneWithFile(originalUncalibratedFilePath);

                // finished calibrating this file
                FinishedWritingFile(calibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                MyTaskResults.NewSpectra.Add(calibratedFilePath);
                MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName);
                FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }));
            }

            // re-write experimental design (if it has been defined) with new calibrated file names
            string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName;

            assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName);
            List <string> newExperimentalDesignOutput = new List <string>();

            if (File.Exists(assumedPathToExperDesign))
            {
                var lines = File.ReadAllLines(assumedPathToExperDesign);

                for (int i = 0; i < lines.Length; i++)
                {
                    // header of experimental design file
                    if (i == 0)
                    {
                        newExperimentalDesignOutput.Add(lines[i]);
                    }
                    else
                    {
                        var    split   = lines[i].Split('\t');
                        string newline = Path.GetFileNameWithoutExtension(split[0]) + CalibSuffix + "\t";
                        for (int j = 1; j < split.Length; j++)
                        {
                            newline += split[j] + "\t";
                        }

                        newExperimentalDesignOutput.Add(newline);
                    }
                }
            }

            File.WriteAllLines(Path.Combine(OutputFolder, GlobalVariables.ExperimentalDesignFileName), newExperimentalDesignOutput);

            // finished calibrating all files for the task
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            return(MyTaskResults);
        }
Exemplo n.º 14
0
        private static void Main(string[] args)
        {
            Console.WriteLine("Welcome to MetaMorpheus");


            // EDGAR: Createing the FlashLfqEngine is unforunately required,
            // otherwise the code just crashes when executed.

            SpectraFileInfo mzml = new SpectraFileInfo("sliced-mzml.mzml", "a", 0, 1, 0);
            var             pg   = new FlashLFQ.ProteinGroup("MyProtein", "gene", "org");
            Identification  id3  = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR",
                                                      1350.65681, 94.12193, 2, new List <FlashLFQ.ProteinGroup> {
                pg
            });
            Identification id4 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR",
                                                    1350.65681, 94.05811, 2, new List <FlashLFQ.ProteinGroup> {
                pg
            });
            FlashLfqEngine engine = new FlashLfqEngine(new List <Identification> {
                id3, id4
            }, normalize: true);

            // EDGAR: End of part required to avoid crash


            //generate toml
            Console.WriteLine("generating toml with {0} key-value pairs", args[1]);
            var tomlData = Toml.Create();

            for (int i = 0; i < int.Parse(args[1]); i++)
            {
                tomlData.Add(i.ToString(), i);
            }

            //write toml
            Console.WriteLine("writing toml file {0}", args[0]);
            Stopwatch stopwatch = Stopwatch.StartNew();

            Toml.WriteFile(tomlData, args[0]);
            stopwatch.Stop();
            Console.WriteLine("Time elapsed for toml write: {0}\n", stopwatch.ElapsedMilliseconds);


            //read file
            Console.WriteLine("reading toml file {0}", args[0]);
            stopwatch = Stopwatch.StartNew();
            var tomlRead = Toml.ReadFile(args[0]);

            stopwatch.Stop();
            Console.WriteLine("Time elapsed for toml read: {0}\n", stopwatch.ElapsedMilliseconds);


            //read mzml file
            Console.WriteLine("reading mzml file {0}", args[2]);
            stopwatch = Stopwatch.StartNew();
            var msData = Mzml.LoadAllStaticData(args[2]);

            stopwatch.Stop();
            Console.WriteLine("Time elapsed for mzML read: {0}\n", stopwatch.ElapsedMilliseconds);


            //write mzml file
            Console.WriteLine("writing mzml file {0}", args[3]);
            stopwatch = Stopwatch.StartNew();
            MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(msData, args[3], false);
            stopwatch.Stop();
            Console.WriteLine("Time elapsed for mzML write: {0}", stopwatch.ElapsedMilliseconds);
        }
Exemplo n.º 15
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList)
        {
            myTaskResults = new MyTaskResults(this)
            {
                newSpectra = new List <string>()
            };

            Status("Loading modifications...", new List <string> {
                taskId
            });
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList();

            Status("Loading proteins...", new List <string> {
                taskId
            });
            var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, UsefulProteomicsDatabases.DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> um)).ToList();

            proseCreatedWhileRunning.Append("The following calibration settings were used: ");
            proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            if (CommonParameters.DigestionParams.MaxPeptideLength == null)
            {
                proseCreatedWhileRunning.Append("maximum peptide length = unspecified; ");
            }
            else
            {
                proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            }
            proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");

            proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. ");
            proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            object          lock1           = new object();
            ParallelOptions parallelOptions = new ParallelOptions();

            if (CommonParameters.MaxParallelFilesToAnalyze.HasValue)
            {
                parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value;
            }
            var myFileManager = new MyFileManager(true);

            Status("Calibrating...", new List <string> {
                taskId
            });
            Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex =>
            {
                var originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
                var originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);

                ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + "-calib.mzml");

                IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile;

                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                // only load one file at a time
                lock (lock1)
                {
                    myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks);
                }

                Status("Acquiring calibration data points...", new List <string> {
                    taskId, "Individual Spectra Files"
                });

                // get datapoints to fit calibration function to
                var acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                // stats before calibration
                int prevPsmCount = acquisitionResults.Item1.Count;

                var preCalibrationPrecursorErrors = acquisitionResults.Item1.Select(p => (p.ScanPrecursorMass - p.PeptideMonisotopicMass.Value) / p.PeptideMonisotopicMass.Value * 1e6).ToList();
                double preCalibrationPrecursorIqr = Statistics.InterquartileRange(preCalibrationPrecursorErrors);

                var preCalibrationProductErrors = acquisitionResults.Item1.SelectMany(p => p.ProductMassErrorPpm.SelectMany(v => v.Value)).ToList();
                double preCalibrationProductIqr = Statistics.InterquartileRange(preCalibrationProductErrors);

                // enough data points to calibrate with?
                if (acquisitionResults.Item2 == null)
                {
                    Warn("Could not find any datapoints to calibrate with!");
                    return;
                }
                if (acquisitionResults.Item2.Ms1List.Count < 4 || acquisitionResults.Item2.Ms2List.Count < 4)
                {
                    Warn("Could not find enough MS1 datapoints to calibrate (" + acquisitionResults.Item2.Ms1List.Count + " found)");
                    Warn("Could not find enough MS2 datapoints to calibrate (" + acquisitionResults.Item2.Ms2List.Count + " found)");
                    return;
                }

                // generate calibration function and shift data points
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                new CalibrationEngine(myMsDataFile, acquisitionResults.Item2, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }).Run();
                myFileManager.DoneWithFile(originalUncalibratedFilePath);

                // do another search to evaluate calibration results
                Status("Post-calibration search...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                // stats after calibration
                int postCalibrationPsmCount = acquisitionResults.Item1.Count;

                var postCalibrationPrecursorErrors = acquisitionResults.Item1.Select(p => (p.ScanPrecursorMass - p.PeptideMonisotopicMass) / p.PeptideMonisotopicMass * 1e6).ToList();
                double postCalibrationPrecursorIqr = Statistics.InterquartileRange(postCalibrationPrecursorErrors);

                var postCalibrationProductErrors = acquisitionResults.Item1.SelectMany(p => p.ProductMassErrorPpm.SelectMany(v => v.Value)).ToList();
                double postCalibrationProductIqr = Statistics.InterquartileRange(postCalibrationProductErrors);

                // did the data improve? (not used for anything yet...)
                bool improvement = ImprovGlobal(preCalibrationPrecursorIqr, preCalibrationProductIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorIqr, postCalibrationProductIqr);

                // write suggested tolerances for this file
                var tomlFileName         = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + "-calib.toml");
                FileSpecificTolerances f = new FileSpecificTolerances
                {
                    PrecursorMassTolerance = new PpmTolerance(4.0 * postCalibrationPrecursorIqr),
                    ProductMassTolerance   = new PpmTolerance(4.0 * postCalibrationProductIqr)
                };
                Toml.WriteFile(f, tomlFileName, tomlConfig);
                SucessfullyFinishedWritingFile(tomlFileName, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });

                // write the calibrated MZML file
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false);

                // all done
                SucessfullyFinishedWritingFile(calibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                myTaskResults.newSpectra.Add(calibratedFilePath);
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }));
            });
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            return(myTaskResults);
        }