public static void WriteEmptyScan() { double[] intensities1 = new double[] { }; double[] mz1 = new double[] { }; MzmlMzSpectrum massSpec1 = new MzmlMzSpectrum(mz1, intensities1, false); IMzmlScan[] scans = new IMzmlScan[] { new MzmlScan(1, massSpec1, 1, true, Polarity.Positive, 1, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec1.SumOfAllY, null, "1") }; FakeMsDataFile f = new FakeMsDataFile(scans); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(f, Path.Combine(TestContext.CurrentContext.TestDirectory, "mzmlWithEmptyScan.mzML"), false); Mzml ok = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, "mzmlWithEmptyScan.mzML")); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "mzmlWithEmptyScan2.mzML"), false); }
/// <summary> /// Tests peak filtering for ThermoRawFileReader /// </summary> public static void TestPeakFilteringRawFileReader(string infile) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); var filterParams = new FilteringParams(200, 0.01, 0, 1, false, true, true); var path = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", infile); var a = ThermoRawFileReaderData.LoadAllStaticData(path, filterParams, maxThreads: 1); var rawScans = a.GetAllScansList(); foreach (var scan in rawScans) { Assert.That(scan.MassSpectrum.XArray.Length <= 200); } string outfile1 = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", Path.GetFileNameWithoutExtension(infile) + ".mzML"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, outfile1, false); var mzml = Mzml.LoadAllStaticData(outfile1, filterParams, maxThreads: 1); var mzmlScans = mzml.GetAllScansList(); for (int i = 0; i < mzmlScans.Count; i++) { var mzmlScan = mzmlScans[i]; var rawScan = rawScans[i]; for (int j = 0; j < mzmlScan.MassSpectrum.XArray.Length; j++) { double roundedMzmlMz = Math.Round(mzmlScan.MassSpectrum.XArray[j], 2); double roundedRawMz = Math.Round(rawScan.MassSpectrum.XArray[j], 2); Assert.AreEqual(roundedMzmlMz, roundedRawMz); double roundedMzmlIntensity = Math.Round(mzmlScan.MassSpectrum.XArray[j], 0); double roundedRawIntensity = Math.Round(rawScan.MassSpectrum.XArray[j], 0); Assert.AreEqual(roundedMzmlIntensity, roundedRawIntensity); } } Console.WriteLine($"Analysis time for TestPeakFilteringRawFileReader: {stopwatch.Elapsed.Hours}h " + $"{stopwatch.Elapsed.Minutes}m {stopwatch.Elapsed.Seconds}s"); }
public void WriteMzmlTest() { var peptide = new Peptide("GPEAPPPALPAGAPPPCTAVTSDHLNSLLGNILR"); OldSchoolChemicalFormulaModification carbamidomethylationOfCMod = new OldSchoolChemicalFormulaModification(ChemicalFormula.ParseFormula("H3C2NO"), "carbamidomethylation of C", ModificationSites.C); peptide.AddModification(carbamidomethylationOfCMod); MzmlMzSpectrum MS1 = CreateSpectrum(peptide.GetChemicalFormula(), 300, 2000, 1); MzmlMzSpectrum MS2 = CreateMS2spectrum(peptide.Fragment(FragmentTypes.b | FragmentTypes.y, true), 100, 1500); IMzmlScan[] Scans = new IMzmlScan[2]; Scans[0] = new MzmlScan(1, MS1, 1, true, Polarity.Positive, 1.0, new MzRange(300, 2000), " first spectrum", MZAnalyzerType.Unknown, MS1.SumOfAllY, 1, "scan=1"); Scans[1] = new MzmlScanWithPrecursor(2, MS2, 2, true, Polarity.Positive, 2.0, new MzRange(100, 1500), " second spectrum", MZAnalyzerType.Unknown, MS2.SumOfAllY, 1134.26091302033, 3, 0.141146966879759, 1134.3, 1, DissociationType.Unknown, 1, 1134.26091302033, 1, "scan=2"); var myMsDataFile = new FakeMsDataFile(Scans); var oldFirstValue = myMsDataFile.GetOneBasedScan(1).MassSpectrum.FirstX; var secondScan = myMsDataFile.GetOneBasedScan(2) as IMsDataScanWithPrecursor <MzmlMzSpectrum>; Assert.AreEqual(1, secondScan.IsolationRange.Maximum - secondScan.IsolationRange.Minimum); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, "argh.mzML", false); Mzml okay = Mzml.LoadAllStaticData(@"argh.mzML"); okay.GetOneBasedScan(2); Assert.AreEqual(1, okay.GetClosestOneBasedSpectrumNumber(1)); Assert.AreEqual(2, okay.GetClosestOneBasedSpectrumNumber(2)); var newFirstValue = okay.GetOneBasedScan(1).MassSpectrum.FirstX; Assert.AreEqual(oldFirstValue.Value, newFirstValue.Value, 1e-9); var secondScan2 = okay.GetOneBasedScan(2) as IMsDataScanWithPrecursor <MzmlMzSpectrum>; Assert.AreEqual(1, secondScan2.IsolationRange.Maximum - secondScan2.IsolationRange.Minimum); secondScan2.MassSpectrum.ReplaceXbyApplyingFunction((a) => 44); Assert.AreEqual(44, secondScan2.MassSpectrum.LastX); }
public void LoadThermoTest() { ThermoRawFile a = new ThermoRawFile(@"Shew_246a_LCQa_15Oct04_Andro_0904-2_4-20.RAW"); a.Open(); a.Open(); Assert.AreEqual(1, a.FirstSpectrumNumber); Assert.AreEqual(3316, a.LastSpectrumNumber); Assert.AreEqual(3316, a.LastSpectrumNumber); var scan = a.GetScan(53); Assert.AreEqual(1.2623333333333333, scan.RetentionTime); Assert.AreEqual(1, scan.MsnOrder); Assert.AreEqual("controllerType=0 controllerNumber=1 scan=53", scan.id); Assert.AreEqual("+ c ESI Full ms [400.00-2000.00]", scan.ScanFilter); var spectrum = a.GetScan(53).MassSpectrum; var peak = spectrum.PeakWithHighestY; Assert.AreEqual(75501, peak.Intensity); Assert.AreEqual(1, spectrum.newSpectrumFilterByY(7.5e4).Count); Assert.AreEqual(2, spectrum.newSpectrumExtract(new DoubleRange(923, 928)).Count); Assert.AreEqual(double.NaN, spectrum.GetSignalToNoise(1)); Assert.AreEqual("1.3", a.GetSofwareVersion()); double ya; a.GetScan(948).TryGetSelectedIonGuessIntensity(out ya); Assert.AreEqual(4125760, ya); Assert.AreEqual("LCQ", a.GetInstrumentName()); Assert.AreEqual("LCQ", a.GetInstrumentModel()); Assert.AreEqual(0, a.GetMSXPrecursors(1289).Count); Assert.AreEqual(1, a.GetMSXPrecursors(1290).Count); Assert.AreEqual(1194.53, a.GetMSXPrecursors(1290).First()); MzmlMethods.CreateAndWriteMyIndexedMZmlwithCalibratedSpectra(a, Path.Combine(Path.GetDirectoryName(a.FilePath), Path.GetFileNameWithoutExtension(a.FilePath)) + ".mzML"); }
/// <summary> /// Tests LoadAllStaticData for ThermoRawFileReader /// </summary> public static void TestLoadAllStaticDataRawFileReader(string infile, string outfile1, string outfile2) { var path = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", infile); outfile1 = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", outfile1); outfile2 = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", outfile2); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); var a = ThermoRawFileReader.LoadAllStaticData(path, maxThreads: 1); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, outfile1, false); var aa = Mzml.LoadAllStaticData(outfile1); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(aa, outfile2, true); Mzml.LoadAllStaticData(outfile2); Console.WriteLine($"Analysis time for TestLoadAllStaticDataRawFileReader({infile}): {stopwatch.Elapsed.Hours}h {stopwatch.Elapsed.Minutes}m {stopwatch.Elapsed.Seconds}s"); }
public static void AnotherMzMLtest() { IMzmlScan[] scans = new IMzmlScan[4]; double[] intensities1 = new double[] { 1 }; double[] mz1 = new double[] { 50 }; MzmlMzSpectrum massSpec1 = new MzmlMzSpectrum(mz1, intensities1, false); scans[0] = new MzmlScan(1, massSpec1, 1, true, Polarity.Positive, 1, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec1.SumOfAllY, null, "1"); double[] intensities2 = new double[] { 1 }; double[] mz2 = new double[] { 30 }; MzmlMzSpectrum massSpec2 = new MzmlMzSpectrum(mz2, intensities2, false); scans[1] = new MzmlScanWithPrecursor(2, massSpec2, 2, true, Polarity.Positive, 2, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec2.SumOfAllY, 50, null, null, 50, 1, DissociationType.CID, 1, null, null, "2"); double[] intensities3 = new double[] { 1 }; double[] mz3 = new double[] { 50 }; MzmlMzSpectrum massSpec3 = new MzmlMzSpectrum(mz3, intensities3, false); scans[2] = new MzmlScan(3, massSpec3, 1, true, Polarity.Positive, 1, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec1.SumOfAllY, null, "3"); double[] intensities4 = new double[] { 1 }; double[] mz4 = new double[] { 30 }; MzmlMzSpectrum massSpec4 = new MzmlMzSpectrum(mz4, intensities4, false); scans[3] = new MzmlScanWithPrecursor(4, massSpec4, 2, true, Polarity.Positive, 2, new MzRange(1, 100), "f", MZAnalyzerType.Orbitrap, massSpec2.SumOfAllY, 50, null, null, 50, 1, DissociationType.CID, 3, null, null, "4"); FakeMsDataFile f = new FakeMsDataFile(scans); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(f, Path.Combine(TestContext.CurrentContext.TestDirectory, "what.mzML"), false); Mzml ok = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, "what.mzML")); var scanWithPrecursor = ok.Last(b => b is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >; Assert.AreEqual(3, scanWithPrecursor.OneBasedPrecursorScanNumber); }
public void LoadMzmlFromConvertedMGFTest() { Mzml a = Mzml.LoadAllStaticData(@"tester.mzML"); var ya = a.GetOneBasedScan(1).MassSpectrum; Assert.AreEqual(192, ya.Size); var ya2 = a.GetOneBasedScan(3).MassSpectrum; Assert.AreEqual(165, ya2.Size); var ya3 = a.GetOneBasedScan(5).MassSpectrum; Assert.AreEqual(551, ya3.Size); var ya4 = a.GetOneBasedScan(975).MassSpectrum; Assert.AreEqual(190, ya4.Size); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, "CreateFileFromConvertedMGF.mzML", false); Mzml b = Mzml.LoadAllStaticData(@"CreateFileFromConvertedMGF.mzML"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(b, "CreateFileFromConvertedMGF2.mzML", false); }
public static void LoadThermoTest2() { ThermoStaticData a = ThermoStaticData.LoadAllStaticData(@"05-13-16_cali_MS_60K-res_MS.raw"); Assert.AreEqual(360, a.NumSpectra); Assert.GreaterOrEqual(1000, a.GetOneBasedScan(1).MassSpectrum.Extract(0, 500).Last().Mz); Assert.AreEqual(2, a.GetOneBasedScan(1).MassSpectrum.FilterByY(5e6, double.MaxValue).Count()); var ye = a.GetOneBasedScan(1).MassSpectrum.CopyTo2DArray(); Assert.AreEqual(77561752, a.GetOneBasedScan(1).TotalIonCurrent); Assert.AreEqual(144, a.GetClosestOneBasedSpectrumNumber(2)); MzSpectrum newSpectrum = new MzSpectrum(a.GetOneBasedScan(51).MassSpectrum.XArray, a.GetOneBasedScan(51).MassSpectrum.YArray, true); Assert.AreEqual(1120, a.GetOneBasedScan(1).MassSpectrum.Size); var newDeconvolution = a.GetOneBasedScan(1).MassSpectrum.Deconvolute(new MzRange(double.MinValue, double.MaxValue), 1, 10, 1, 4).ToList(); Assert.IsTrue(newDeconvolution.Any(b => Math.Abs(b.peaks.First().mz.ToMass(b.charge) - 523.257) < 0.001)); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, Path.Combine(TestContext.CurrentContext.TestDirectory, "convertedThermo.mzML"), false); var sdafaf = a.Deconvolute(null, null, 1, 30, 10, 3, 10, b => true).OrderByDescending(b => b.NumPeaks).First(); Assert.IsTrue(Math.Abs(262.64 - sdafaf.Mass.ToMz(2)) <= 0.01); using (ThermoDynamicData dynamicThermo = ThermoDynamicData.InitiateDynamicConnection(@"05-13-16_cali_MS_60K-res_MS.raw")) { Assert.AreEqual(136, dynamicThermo.GetClosestOneBasedSpectrumNumber(1.89)); dynamicThermo.ClearCachedScans(); } Mzml readCovertedMzmlFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, "convertedThermo.mzML")); Assert.AreEqual(a.GetAllScansList().First().Polarity, readCovertedMzmlFile.GetAllScansList().First().Polarity); }
public static void ReadWriteReadEtc() { { ThermoStaticData a = ThermoStaticData.LoadAllStaticData(@"testFileWMS2.raw"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, "a.mzML", false); var aa = Mzml.LoadAllStaticData("a.mzML"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(aa, "aa.mzML", true); Mzml.LoadAllStaticData("aa.mzML"); } { ThermoStaticData a = ThermoStaticData.LoadAllStaticData(@"small.raw"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, "a.mzML", false); var aa = Mzml.LoadAllStaticData("a.mzML"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(aa, "aa.mzML", true); Mzml.LoadAllStaticData("aa.mzML"); } { ThermoStaticData a = ThermoStaticData.LoadAllStaticData(@"05-13-16_cali_MS_60K-res_MS.raw"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(a, "a.mzML", false); var aa = Mzml.LoadAllStaticData("a.mzML"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(aa, "aa.mzML", true); Mzml.LoadAllStaticData("aa.mzML"); } }
public static void TestIndividualFileOutput() { string subFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"IndividualOutputTest"); Directory.CreateDirectory(subFolder); string outputFolder = Path.Combine(subFolder, "Results"); SearchTask allowFilesTask = new SearchTask(); allowFilesTask.SearchParameters.WriteIndividualFiles = true; allowFilesTask.SearchParameters.CompressIndividualFiles = false; SearchTask compressFilesTask = new SearchTask(); compressFilesTask.SearchParameters.WriteIndividualFiles = true; compressFilesTask.SearchParameters.CompressIndividualFiles = true; SearchTask noFilesTask = new SearchTask(); noFilesTask.SearchParameters.WriteIndividualFiles = false; PeptideWithSetModifications pwsm = new PeptideWithSetModifications("AAFNSGK", null); List <(string, MetaMorpheusTask)> tasks = new List <(string, MetaMorpheusTask)> { ("allowFiles", allowFilesTask), ("compressFiles", compressFilesTask), ("noFiles", noFilesTask) }; DbForTask db = new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", "gapdh.fasta"), false); TestDataFile datafile = new TestDataFile(pwsm); string pathOne = Path.Combine(subFolder, "fileOne.mzml"); string pathTwo = Path.Combine(subFolder, "fileTwo.mzml"); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(datafile, pathOne, false); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(datafile, pathTwo, false); new EverythingRunnerEngine(tasks, new List <string> { pathOne, pathTwo }, new List <DbForTask> { db }, outputFolder).Run(); //check that the first task wrote everything fine HashSet <string> expectedFiles = new HashSet <string> { ".mzID", "_Peptides.psmtsv", "_ProteinGroups.tsv", "_PSMs.psmtsv", "_PSMsFormattedForPercolator.tab", "_QuantifiedPeaks.tsv" }; HashSet <string> writtenFiles = new HashSet <string>(Directory.GetFiles(Path.Combine(outputFolder, "allowFiles", "Individual File Results")).Select(v => Path.GetFileName(v).Substring(7))); //check they're the same Assert.IsTrue(expectedFiles.Except(writtenFiles).Count() == 0); //check the second one is compressed and contains all the information writtenFiles = new HashSet <string>(Directory.GetFiles(Path.Combine(outputFolder, "compressFiles")).Select(v => Path.GetFileName(v))); //check the zip exists Assert.IsTrue(writtenFiles.Contains("Individual File Results.zip")); //check the original folder does not exist string[] subfolders = Directory.GetDirectories(Path.Combine(outputFolder, "compressFiles")); Assert.IsTrue(subfolders.Length == 0); ZipFile.ExtractToDirectory(Path.Combine(outputFolder, "compressFiles", "Individual File Results.zip"), Path.Combine(outputFolder, "compressFiles", "Individual File Results")); //read the extracted files writtenFiles = new HashSet <string>(Directory.GetFiles(Path.Combine(outputFolder, "compressFiles", "Individual File Results")).Select(v => Path.GetFileName(v).Substring(7))); //check they're the same Assert.IsTrue(expectedFiles.Except(writtenFiles).Count() == 0); //check the last one to make sure nothing was written except for the mzID files writtenFiles = new HashSet <string>(Directory.GetFiles(Path.Combine(outputFolder, "noFiles", "Individual File Results")).Select(v => Path.GetFileName(v).Substring(7))); Assert.IsTrue(writtenFiles.Count == 1); Assert.IsTrue(writtenFiles.Contains(".mzID")); Directory.Delete(outputFolder, true); //Do a check that we don't crash if there's only one file but somebody tries to zip the individual file results SearchTask weirdTask = new SearchTask(); weirdTask.SearchParameters.CompressIndividualFiles = true; weirdTask.SearchParameters.WriteMzId = false; new EverythingRunnerEngine(new List <(string, MetaMorpheusTask)> { ("weird", weirdTask) }, new List <string> {
public bool Run_TdMzCal(InputFile raw_file, List <SpectrumMatch> topdown_hits) { all_topdown_hits = topdown_hits.Where(h => h.score > 0).ToList(); //need to reset m/z in case same td hits used for multiple calibration raw files... Parallel.ForEach(all_topdown_hits, h => h.mz = h.reported_mass.ToMz(h.charge)); high_scoring_topdown_hits = all_topdown_hits.Where(h => h.score >= 40).ToList(); this.raw_file = raw_file; if (high_scoring_topdown_hits.Count < 5) { return(false); } myMsDataFile = Path.GetExtension(raw_file.complete_path) == ".raw" ? ThermoStaticData.LoadAllStaticData(raw_file.complete_path) : null; if (myMsDataFile == null) { myMsDataFile = Mzml.LoadAllStaticData(raw_file.complete_path); } if (myMsDataFile == null) { return(false); } DataPointAquisitionResults dataPointAcquisitionResult = GetDataPoints(); if (dataPointAcquisitionResult.Ms1List.Count < 10) { return(false); } if (Sweet.lollipop.mass_calibration) { var myMs1DataPoints = new List <(double[] xValues, double yValue)>(); for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++) { //x values var explanatoryVariables = new double[4]; explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].mz; explanatoryVariables[1] = dataPointAcquisitionResult.Ms1List[i].retentionTime; explanatoryVariables[2] = dataPointAcquisitionResult.Ms1List[i].logTotalIonCurrent; explanatoryVariables[3] = dataPointAcquisitionResult.Ms1List[i].logInjectionTime; //yvalue double mzError = dataPointAcquisitionResult.Ms1List[i].massError; myMs1DataPoints.Add((explanatoryVariables, mzError)); } var ms1Model = GetRandomForestModel(myMs1DataPoints); CalibrateHitsAndComponents(ms1Model); if (Sweet.lollipop.calibrate_raw_files) { MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, raw_file.directory + "\\" + raw_file.filename + "_calibrated.mzML", false); } } if (Sweet.lollipop.retention_time_calibration) { var myMs1DataPoints = new List <(double[] xValues, double yValue)>(); List <SpectrumMatch> firstElutingTopDownHit = new List <SpectrumMatch>(); List <string> PFRs = high_scoring_topdown_hits.Select(h => h.pfr_accession).Distinct().ToList(); foreach (var PFR in PFRs) { var firstHitWithPFR = high_scoring_topdown_hits .Where(h => h.pfr_accession == PFR).OrderBy(h => h.ms2_retention_time).First(); firstElutingTopDownHit.Add(firstHitWithPFR); } for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++) { if (firstElutingTopDownHit.Contains(dataPointAcquisitionResult.Ms1List[i].identification)) { //x values var explanatoryVariables = new double[1]; explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].retentionTime; //yvalue double RTError = dataPointAcquisitionResult.Ms1List[i].RTError; myMs1DataPoints.Add((explanatoryVariables, RTError)); } } if (myMs1DataPoints.Count < 10) { return(false); } var ms1Model = GetRandomForestModel(myMs1DataPoints); foreach (Component c in Sweet.lollipop.calibration_components.Where(h => h.input_file.lt_condition == raw_file.lt_condition && h.input_file.biological_replicate == raw_file.biological_replicate && h.input_file.fraction == raw_file.fraction && h.input_file.technical_replicate == raw_file.technical_replicate)) { c.rt_apex = c.rt_apex - ms1Model.Predict(new double[] { c.rt_apex }); } } return(true); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters); // write prose settings ProseCreatedWhileRunning.Append("The following calibration settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the calibration task Status("Calibrating...", new List <string> { taskId }); MyTaskResults = new MyTaskResults(this) { NewSpectra = new List <string>(), NewFileSpecificTomls = new List <string>() }; var myFileManager = new MyFileManager(true); List <string> spectraFilesAfterCalibration = new List <string>(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } bool couldNotFindEnoughDatapoints = false; // get filename stuff var originalUncalibratedFilePath = currentRawFileList[spectraFileIndex]; var originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath); string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML"); // mark the file as in-progress StartingDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); // load the file Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files" }); var myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters); // get datapoints to fit calibration function to Status("Acquiring calibration data points...", new List <string> { taskId, "Individual Spectra Files" }); DataPointAquisitionResults acquisitionResults = null; for (int i = 1; i <= 5; i++) { acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // enough data points to calibrate? if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints) { break; } if (i == 1) // failed round 1 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(20); CommonParameters.ProductMassTolerance = new PpmTolerance(50); } else if (i == 2) // failed round 2 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(30); CommonParameters.ProductMassTolerance = new PpmTolerance(100); } else if (i == 3) // failed round 3 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(40); CommonParameters.ProductMassTolerance = new PpmTolerance(150); } else // failed round 4 { if (acquisitionResults.Psms.Count < NumRequiredPsms) { Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count); } if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints) { Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count); } if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints) { Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count); } couldNotFindEnoughDatapoints = true; FinishedDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); break; } Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " + Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " + Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product"); } if (couldNotFindEnoughDatapoints) { spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(currentRawFileList[spectraFileIndex])); ReportProgress(new ProgressEventArgs(100, "Failed to calibrate!", new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension })); continue; } // stats before calibration int prevPsmCount = acquisitionResults.Psms.Count; double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError; double preCalibrationProductErrorIqr = acquisitionResults.PsmProductIqrPpmError; // generate calibration function and shift data points Status("Calibrating...", new List <string> { taskId, "Individual Spectra Files" }); CalibrationEngine engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); engine.Run(); //update file myMsDataFile = engine.CalibratedDataFile; // do another search to evaluate calibration results Status("Post-calibration search...", new List <string> { taskId, "Individual Spectra Files" }); acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); //generate calibration function and shift data points AGAIN because it's fast and contributes new data Status("Calibrating...", new List <string> { taskId, "Individual Spectra Files" }); engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); engine.Run(); //update file myMsDataFile = engine.CalibratedDataFile; // write the calibrated mzML file MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false); myFileManager.DoneWithFile(originalUncalibratedFilePath); // stats after calibration int postCalibrationPsmCount = acquisitionResults.Psms.Count; double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError; double postCalibrationProductErrorIqr = acquisitionResults.PsmProductIqrPpmError; // did the data improve? (not used for anything yet...) bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr); // write toml settings for the calibrated file var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml"); var fileSpecificParams = new FileSpecificParameters(); // carry over file-specific parameters from the uncalibrated file to the calibrated one if (fileSettingsList[spectraFileIndex] != null) { fileSpecificParams = fileSettingsList[spectraFileIndex].Clone(); } //suggest 4 * interquartile range as the ppm tolerance fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError)); fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError)); Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig); FinishedWritingFile(newTomlFileName, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); // finished calibrating this file spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(calibratedFilePath)); FinishedWritingFile(calibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); MyTaskResults.NewSpectra.Add(calibratedFilePath); MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName); FinishedDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension })); } // re-write experimental design (if it has been defined) with new calibrated file names string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName; assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName); if (File.Exists(assumedPathToExperDesign)) { WriteNewExperimentalDesignFile(assumedPathToExperDesign, OutputFolder, spectraFilesAfterCalibration); } // finished calibrating all files for the task ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); return(MyTaskResults); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { // load modifications Status("Loading modifications...", new List <string> { taskId }); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList(); // what types of fragment ions to search for List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.BnoB1ions); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters); // write prose settings ProseCreatedWhileRunning.Append("The following calibration settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the calibration task Status("Calibrating...", new List <string> { taskId }); MyTaskResults = new MyTaskResults(this) { NewSpectra = new List <string>(), NewFileSpecificTomls = new List <string>() }; object lock1 = new object(); var myFileManager = new MyFileManager(true); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } // get filename stuff var originalUncalibratedFilePath = currentRawFileList[spectraFileIndex]; var originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath); string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML"); // mark the file as in-progress StartingDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MsDataFile myMsDataFile; // load the file Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files" }); lock (lock1) { myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks, CommonParameters); } // get datapoints to fit calibration function to Status("Acquiring calibration data points...", new List <string> { taskId, "Individual Spectra Files" }); DataPointAquisitionResults acquisitionResults = null; for (int i = 1; i <= 5; i++) { acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // enough data points to calibrate? if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints) { break; } if (i == 1) // failed round 1 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(20); CommonParameters.ProductMassTolerance = new PpmTolerance(50); } else if (i == 2) // failed round 2 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(30); CommonParameters.ProductMassTolerance = new PpmTolerance(100); } else if (i == 3) // failed round 3 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(40); CommonParameters.ProductMassTolerance = new PpmTolerance(150); } else // failed round 4 { if (acquisitionResults.Psms.Count < NumRequiredPsms) { Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count); } if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints) { Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count); } if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints) { Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count); } FinishedDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); return(MyTaskResults); } Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " + Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " + Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product"); } // stats before calibration int prevPsmCount = acquisitionResults.Psms.Count; double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError; double preCalibrationProductErrorIqr = acquisitionResults.PsmProductIqrPpmError; // generate calibration function and shift data points Status("Calibrating...", new List <string> { taskId, "Individual Spectra Files" }); new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }).Run(); // do another search to evaluate calibration results Status("Post-calibration search...", new List <string> { taskId, "Individual Spectra Files" }); acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // stats after calibration int postCalibrationPsmCount = acquisitionResults.Psms.Count; double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError; double postCalibrationProductErrorIqr = acquisitionResults.PsmProductIqrPpmError; // did the data improve? (not used for anything yet...) bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr); // write toml settings for the calibrated file var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml"); var fileSpecificParams = new FileSpecificParameters(); // carry over file-specific parameters from the uncalibrated file to the calibrated one if (fileSettingsList[spectraFileIndex] != null) { fileSpecificParams = fileSettingsList[spectraFileIndex].Clone(); } // don't write over ppm tolerances if they've been specified by the user already in the file-specific settings // otherwise, suggest 4 * interquartile range as the ppm tolerance if (fileSpecificParams.PrecursorMassTolerance == null) { fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError)); } if (fileSpecificParams.ProductMassTolerance == null) { fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError)); } Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig); FinishedWritingFile(newTomlFileName, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); // write the calibrated mzML file MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false); myFileManager.DoneWithFile(originalUncalibratedFilePath); // finished calibrating this file FinishedWritingFile(calibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); MyTaskResults.NewSpectra.Add(calibratedFilePath); MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName); FinishedDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension })); } // re-write experimental design (if it has been defined) with new calibrated file names string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName; assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName); List <string> newExperimentalDesignOutput = new List <string>(); if (File.Exists(assumedPathToExperDesign)) { var lines = File.ReadAllLines(assumedPathToExperDesign); for (int i = 0; i < lines.Length; i++) { // header of experimental design file if (i == 0) { newExperimentalDesignOutput.Add(lines[i]); } else { var split = lines[i].Split('\t'); string newline = Path.GetFileNameWithoutExtension(split[0]) + CalibSuffix + "\t"; for (int j = 1; j < split.Length; j++) { newline += split[j] + "\t"; } newExperimentalDesignOutput.Add(newline); } } } File.WriteAllLines(Path.Combine(OutputFolder, GlobalVariables.ExperimentalDesignFileName), newExperimentalDesignOutput); // finished calibrating all files for the task ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); return(MyTaskResults); }
private static void Main(string[] args) { Console.WriteLine("Welcome to MetaMorpheus"); // EDGAR: Createing the FlashLfqEngine is unforunately required, // otherwise the code just crashes when executed. SpectraFileInfo mzml = new SpectraFileInfo("sliced-mzml.mzml", "a", 0, 1, 0); var pg = new FlashLFQ.ProteinGroup("MyProtein", "gene", "org"); Identification id3 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.12193, 2, new List <FlashLFQ.ProteinGroup> { pg }); Identification id4 = new Identification(mzml, "EGFQVADGPLYR", "EGFQVADGPLYR", 1350.65681, 94.05811, 2, new List <FlashLFQ.ProteinGroup> { pg }); FlashLfqEngine engine = new FlashLfqEngine(new List <Identification> { id3, id4 }, normalize: true); // EDGAR: End of part required to avoid crash //generate toml Console.WriteLine("generating toml with {0} key-value pairs", args[1]); var tomlData = Toml.Create(); for (int i = 0; i < int.Parse(args[1]); i++) { tomlData.Add(i.ToString(), i); } //write toml Console.WriteLine("writing toml file {0}", args[0]); Stopwatch stopwatch = Stopwatch.StartNew(); Toml.WriteFile(tomlData, args[0]); stopwatch.Stop(); Console.WriteLine("Time elapsed for toml write: {0}\n", stopwatch.ElapsedMilliseconds); //read file Console.WriteLine("reading toml file {0}", args[0]); stopwatch = Stopwatch.StartNew(); var tomlRead = Toml.ReadFile(args[0]); stopwatch.Stop(); Console.WriteLine("Time elapsed for toml read: {0}\n", stopwatch.ElapsedMilliseconds); //read mzml file Console.WriteLine("reading mzml file {0}", args[2]); stopwatch = Stopwatch.StartNew(); var msData = Mzml.LoadAllStaticData(args[2]); stopwatch.Stop(); Console.WriteLine("Time elapsed for mzML read: {0}\n", stopwatch.ElapsedMilliseconds); //write mzml file Console.WriteLine("writing mzml file {0}", args[3]); stopwatch = Stopwatch.StartNew(); MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(msData, args[3], false); stopwatch.Stop(); Console.WriteLine("Time elapsed for mzML write: {0}", stopwatch.ElapsedMilliseconds); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this) { newSpectra = new List <string>() }; Status("Loading modifications...", new List <string> { taskId }); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList(); Status("Loading proteins...", new List <string> { taskId }); var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, UsefulProteomicsDatabases.DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> um)).ToList(); proseCreatedWhileRunning.Append("The following calibration settings were used: "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); if (CommonParameters.DigestionParams.MaxPeptideLength == null) { proseCreatedWhileRunning.Append("maximum peptide length = unspecified; "); } else { proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); } proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. "); proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); object lock1 = new object(); ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } var myFileManager = new MyFileManager(true); Status("Calibrating...", new List <string> { taskId }); Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var originalUncalibratedFilePath = currentRawFileList[spectraFileIndex]; var originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath); ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + "-calib.mzml"); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile; Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files" }); // only load one file at a time lock (lock1) { myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks); } Status("Acquiring calibration data points...", new List <string> { taskId, "Individual Spectra Files" }); // get datapoints to fit calibration function to var acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // stats before calibration int prevPsmCount = acquisitionResults.Item1.Count; var preCalibrationPrecursorErrors = acquisitionResults.Item1.Select(p => (p.ScanPrecursorMass - p.PeptideMonisotopicMass.Value) / p.PeptideMonisotopicMass.Value * 1e6).ToList(); double preCalibrationPrecursorIqr = Statistics.InterquartileRange(preCalibrationPrecursorErrors); var preCalibrationProductErrors = acquisitionResults.Item1.SelectMany(p => p.ProductMassErrorPpm.SelectMany(v => v.Value)).ToList(); double preCalibrationProductIqr = Statistics.InterquartileRange(preCalibrationProductErrors); // enough data points to calibrate with? if (acquisitionResults.Item2 == null) { Warn("Could not find any datapoints to calibrate with!"); return; } if (acquisitionResults.Item2.Ms1List.Count < 4 || acquisitionResults.Item2.Ms2List.Count < 4) { Warn("Could not find enough MS1 datapoints to calibrate (" + acquisitionResults.Item2.Ms1List.Count + " found)"); Warn("Could not find enough MS2 datapoints to calibrate (" + acquisitionResults.Item2.Ms2List.Count + " found)"); return; } // generate calibration function and shift data points Status("Calibrating...", new List <string> { taskId, "Individual Spectra Files" }); new CalibrationEngine(myMsDataFile, acquisitionResults.Item2, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }).Run(); myFileManager.DoneWithFile(originalUncalibratedFilePath); // do another search to evaluate calibration results Status("Post-calibration search...", new List <string> { taskId, "Individual Spectra Files" }); acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // stats after calibration int postCalibrationPsmCount = acquisitionResults.Item1.Count; var postCalibrationPrecursorErrors = acquisitionResults.Item1.Select(p => (p.ScanPrecursorMass - p.PeptideMonisotopicMass) / p.PeptideMonisotopicMass * 1e6).ToList(); double postCalibrationPrecursorIqr = Statistics.InterquartileRange(postCalibrationPrecursorErrors); var postCalibrationProductErrors = acquisitionResults.Item1.SelectMany(p => p.ProductMassErrorPpm.SelectMany(v => v.Value)).ToList(); double postCalibrationProductIqr = Statistics.InterquartileRange(postCalibrationProductErrors); // did the data improve? (not used for anything yet...) bool improvement = ImprovGlobal(preCalibrationPrecursorIqr, preCalibrationProductIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorIqr, postCalibrationProductIqr); // write suggested tolerances for this file var tomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + "-calib.toml"); FileSpecificTolerances f = new FileSpecificTolerances { PrecursorMassTolerance = new PpmTolerance(4.0 * postCalibrationPrecursorIqr), ProductMassTolerance = new PpmTolerance(4.0 * postCalibrationProductIqr) }; Toml.WriteFile(f, tomlFileName, tomlConfig); SucessfullyFinishedWritingFile(tomlFileName, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); // write the calibrated MZML file MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false); // all done SucessfullyFinishedWritingFile(calibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); myTaskResults.newSpectra.Add(calibratedFilePath); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension })); }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); return(myTaskResults); }