static void Main(string[] args) { try { Parser.Default.ParseArguments <Options>(args).WithParsed(options => { if (options.Verbose) { SetVerboseLogging(); } bool combine = options.Combine; string dateTime = DateTime.Now.ToString("yyyy-MM-dd_hh-mm-ss"); List <string> inputFiles = new List <string>(); if (options.LoadFromDirectory != null && options.LoadFromDirectory == true)//multiple files { var directoryPath = Path.GetDirectoryName(options.InputFile); DirectoryInfo di = new DirectoryInfo(directoryPath); Logger.Info("Attempting to load files with the extension .mzml in the following directory: {0}", directoryPath); foreach (var file in di.GetFiles("*.mzml", SearchOption.TopDirectoryOnly)) { inputFiles.Add(file.FullName); } if (inputFiles.Count == 0) { Logger.Error("Unable to locate any MZML files in {0} directory", directoryPath); throw new FileNotFoundException(); } } else //single file { inputFiles.Add(options.InputFile); } foreach (string inputFilePath in inputFiles) { bool lastFile = false;//saving whether its the last file or not, so if we need to combine all the files in the end, we know when the end is. string fileSpecificDirectory = DirectoryCreator.CreateOutputDirectory(inputFilePath, dateTime); if (inputFilePath == inputFiles.Last()) { lastFile = true; } Logger.Info("Loading file: {0}", inputFilePath); Stopwatch sw = new Stopwatch(); sw.Start(); int division; if (options.Division < 100 && options.Division > 0) { division = options.Division; } else { Logger.Error("Number of divisions must be within the range 1 - 100. You have input: {0}", options.Division); throw new ArgumentOutOfRangeException(); } bool irt = !String.IsNullOrEmpty(options.IRTFile); MzmlParser.MzmlReader mzmlParser = new MzmlParser.MzmlReader { ParseBinaryData = options.ParseBinaryData ?? true, Threading = options.Threading ?? true, MaxQueueSize = options.MaxQueueSize, MaxThreads = options.MaxThreads }; CheckFileIsReadableOrComplain(inputFilePath); AnalysisSettings analysisSettings = new AnalysisSettings() { MassTolerance = options.MassTolerance, RtTolerance = options.RtTolerance, IrtMinIntensity = options.IrtMinIntensity, IrtMinPeptides = options.IrtMinTransitions, IrtMassTolerance = options.IrtMassTolerance, CacheSpectraToDisk = options.Cache, MinimumIntensity = options.MinimumIntensity, RunEndTime = options.RunEndTime }; if (!String.IsNullOrEmpty(options.IRTFile)) { irt = true; if (options.IRTFile.ToLower().EndsWith("traml", StringComparison.InvariantCultureIgnoreCase)) { TraMLReader traMLReader = new TraMLReader(); analysisSettings.IrtLibrary = traMLReader.LoadLibrary(options.IRTFile); } else if (options.IRTFile.ToLower().EndsWith("tsv", StringComparison.InvariantCultureIgnoreCase) || options.IRTFile.ToLower().EndsWith("csv", StringComparison.InvariantCultureIgnoreCase)) { SVReader svReader = new SVReader(); analysisSettings.IrtLibrary = svReader.LoadLibrary(options.IRTFile); } } MzmlParser.Run run = mzmlParser.LoadMzml(inputFilePath, analysisSettings); AnalysisSettingsFileWriter Aw = new AnalysisSettingsFileWriter(); if (inputFiles.Count() > 1 && lastFile)//multiple files and this is the last { Aw.WriteASFile(run, dateTime, inputFiles); } else //only one file { Aw.WriteASFile(run, dateTime, inputFilePath); } Logger.Info("Generating metrics...", Convert.ToInt32(sw.Elapsed.TotalSeconds)); var swameMetrics = new SwaMe.MetricGenerator().GenerateMetrics(run, division, inputFilePath, irt, combine, lastFile, dateTime); var progMetrics = new Prognosticator.MetricGenerator().GenerateMetrics(run); var metrics = swameMetrics.Union(progMetrics).ToDictionary(k => k.Key, v => v.Value); string[] mzQCName = { dateTime, Path.GetFileNameWithoutExtension(inputFilePath), "mzQC.json" }; Directory.SetCurrentDirectory(fileSpecificDirectory); new MzqcGenerator.MzqcWriter().BuildMzqcAndWrite(string.Join("_", mzQCName), run, metrics, inputFilePath); Logger.Info("Generated metrics in {0} seconds", Convert.ToInt32(sw.Elapsed.TotalSeconds)); if (analysisSettings.CacheSpectraToDisk) { Logger.Info("Deleting temp files..."); mzmlParser.DeleteTempFiles(run); } Logger.Info("Done!"); } }); } catch (Exception ex) { Logger.Error("An unexpected error occured:"); Logger.Error(ex.Message); Logger.Error(ex.StackTrace); LogManager.Shutdown(); Environment.Exit(1); } LogManager.Shutdown(); Environment.Exit(0); }
private static void ParseBase64Data(ScanAndTempProperties scan, Run run, bool threading, bool irt) { float[] intensities = ExtractFloatArray(scan.Base64IntensityArray, scan.IntensityZlibCompressed, scan.IntensityBitLength); float[] mzs = ExtractFloatArray(scan.Base64MzArray, scan.MzZlibCompressed, scan.MzBitLength); if (intensities.Count() == 0) { intensities = FillZeroArray(intensities); mzs = FillZeroArray(mzs); logger.Debug("Empty binary array for a MS{0} scan in cycle number: {1}. The empty scans have been filled with zero values.", scan.Scan.MsLevel, scan.Scan.Cycle); run.MissingScans++; } var spectrum = intensities.Select((x, i) => new SpectrumPoint(x, mzs[i], (float)scan.Scan.ScanStartTime)).Where(x => x.Intensity >= run.AnalysisSettings.MinimumIntensity).ToList(); //Predicted singly charged proportion: //The theory is that an M and M+1 pair are singly charged so we are very simply just looking for occurences where two ions are 1 mz apart (+-massTolerance) //We therefore create an array cusums that accumulates the difference between ions, so for every ion we calculate the distance between that ion //and the previous and add that to each of the previous ions' cusum of differences. If the cusum of an ion overshoots 1 +massTolerance, we stop adding to it, if it reaches our mark we count it and stop adding to it List <int> indexes = new List <int>(); float[] cusums = new float[mzs.Length]; int movingPoint = 0; double minimum = 1 - 0.001; double maximum = 1 + 0.001; for (int i = 1; i < mzs.Length; i++) { float distance = mzs[i] - mzs[i - 1]; bool matchedWithLower = false; for (int ii = movingPoint; ii < i; ii++) { cusums[ii] += distance; if (cusums[ii] < minimum) { continue; } else if (cusums[ii] > minimum && cusums[ii] < maximum) { if (!matchedWithLower)//This is to try and minimise false positives where for example if you have an array: 351.14, 351.15, 352.14 all three get chosen. { indexes.Add(i); indexes.Add(movingPoint); } movingPoint += 1; matchedWithLower = true; continue; } else if (cusums[ii] > maximum) { movingPoint += 1; } } } int distinct = indexes.Distinct().Count(); int len = mzs.Length; scan.Scan.ProportionChargeStateOne = (double)distinct / (double)len; if (scan.Scan.TotalIonCurrent == 0) { scan.Scan.TotalIonCurrent = intensities.Sum(); TicNotFound = true; } scan.Scan.Spectrum = new Spectrum() { SpectrumPoints = spectrum }; scan.Scan.IsolationWindowLowerBoundary = scan.Scan.IsolationWindowTargetMz - scan.Scan.IsolationWindowLowerOffset; scan.Scan.IsolationWindowUpperBoundary = scan.Scan.IsolationWindowTargetMz + scan.Scan.IsolationWindowUpperOffset; scan.Scan.Density = spectrum.Count(); scan.Scan.BasePeakIntensity = intensities.Max(); scan.Scan.BasePeakMz = mzs[Array.IndexOf(intensities, intensities.Max())]; AddScanToRun(scan.Scan, run); float basepeakIntensity; if (intensities.Count() > 0) { basepeakIntensity = intensities.Max(); int maxIndex = intensities.ToList().IndexOf(basepeakIntensity); double mz = mzs[maxIndex]; if (run.BasePeaks.Count(x => Math.Abs(x.Mz - mz) < run.AnalysisSettings.MassTolerance) < 1)//If a basepeak with this mz doesn't exist yet add it { BasePeak bp = new BasePeak(mz, scan.Scan.ScanStartTime, basepeakIntensity); run.BasePeaks.Add(bp); } else //we do have a match, now lets figure out if they fall within the rtTolerance { //find out which basepeak foreach (BasePeak thisbp in run.BasePeaks.Where(x => Math.Abs(x.Mz - mz) < run.AnalysisSettings.MassTolerance)) { bool found = false; for (int rt = 0; rt < thisbp.BpkRTs.Count(); rt++) { if (Math.Abs(thisbp.BpkRTs[rt] - scan.Scan.ScanStartTime) < run.AnalysisSettings.RtTolerance)//this is part of a previous basepeak, or at least considered to be { found = true; break; } } if (!found)//This is considered to be a new instance { thisbp.BpkRTs.Add(scan.Scan.ScanStartTime); thisbp.Intensities.Add(basepeakIntensity); } } } } else { basepeakIntensity = 0; } //Extract info for Basepeak chromatograms if (irt) { FindIrtPeptideCandidates(scan, run, spectrum); } if (threading) { cde.Signal(); } }
private void FindMs2IsolationWindows(Run run) { run.IsolationWindows = run.Ms2Scans.Select(x => (x.IsolationWindowTargetMz - x.IsolationWindowLowerOffset, x.IsolationWindowTargetMz + x.IsolationWindowUpperOffset)).Distinct().ToList(); logger.Debug("{0} isolation windows detected: min {1} max {2}", run.IsolationWindows.Count, run.IsolationWindows.Min(x => x.Item2 - x.Item1), run.IsolationWindows.Max(x => x.Item2 - x.Item1)); }
public void ReadSpectrum(XmlReader reader, Run run, bool irt) { ScanAndTempProperties scan = new ScanAndTempProperties(run.AnalysisSettings.CacheSpectraToDisk); //The cycle number is within a kvp string in the following format: "sample=1 period=1 cycle=1 experiment=1" // //This is a bit code-soup but I didn't want to spend more than one line on it and it should be robust enough not just to select on index // //This has only been tested on Sciex converted data // //Paul Brack 2019/04/03 bool CycleInfoInID = false; if (run.SourceFileTypes[0].EndsWith("wiff", StringComparison.InvariantCultureIgnoreCase) || run.SourceFileTypes[0].ToUpper().EndsWith("scan", StringComparison.InvariantCultureIgnoreCase)) { if (!string.IsNullOrEmpty(reader.GetAttribute("id")) && !string.IsNullOrEmpty(reader.GetAttribute("id").Split(' ').DefaultIfEmpty("0").Single(x => x.Contains("cycle")))) { scan.Scan.Cycle = int.Parse(reader.GetAttribute("id").Split(' ').DefaultIfEmpty("0").Single(x => x.Contains("cycle")).Split('=').Last()); if (scan.Scan.Cycle != 0) //Some wiffs don't have that info so let's check { CycleInfoInID = true; } } } bool cvParamsRead = false; while (reader.Read() && !cvParamsRead) { if (reader.IsStartElement()) { if (reader.LocalName == "cvParam") { switch (reader.GetAttribute("accession")) { case "MS:1000511": scan.Scan.MsLevel = int.Parse(reader.GetAttribute("value")); break; case "MS:1000285": scan.Scan.TotalIonCurrent = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000016": scan.Scan.ScanStartTime = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); run.StartTime = Math.Min(run.StartTime, scan.Scan.ScanStartTime); run.LastScanTime = Math.Max(run.LastScanTime, scan.Scan.ScanStartTime);//technically this is the starttime of the last scan not the completion time break; case "MS:1000829": scan.Scan.IsolationWindowUpperOffset = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000828": scan.Scan.IsolationWindowLowerOffset = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000827": scan.Scan.IsolationWindowTargetMz = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; } } else if (reader.LocalName == "binaryDataArray") { GetBinaryData(reader, scan); } if (scan.Scan.MsLevel == null && reader.LocalName == "referenceableParamGroupRef") { if (reader.GetAttribute("ref") == SurveyScanReferenceableParamGroupId) { scan.Scan.MsLevel = 1; } else { scan.Scan.MsLevel = 2; } } } else if (reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "spectrum") { if (!CycleInfoInID) { if (scan.Scan.MsLevel == 1) { currentCycle++; scan.Scan.Cycle = currentCycle; MS1 = true; } //if there is ScanAndTempProperties ms1: else if (MS1) { scan.Scan.Cycle = currentCycle; } //if there is no ms1: else { if (previousTargetMz < scan.Scan.IsolationWindowTargetMz) { scan.Scan.Cycle = currentCycle; } else { currentCycle++; scan.Scan.Cycle = currentCycle; } } } previousTargetMz = scan.Scan.IsolationWindowTargetMz; if (ParseBinaryData) { if (Threading) { cde.AddCount(); if (run.AnalysisSettings.CacheSpectraToDisk) //this option exists largely to restrict RAM use, so don't let queue get too big { while (cde.CurrentCount > MaxQueueSize) { Thread.Sleep(1000); } } ThreadPool.QueueUserWorkItem(state => ParseBase64Data(scan, run, Threading, irt)); } else { ParseBase64Data(scan, run, Threading, irt); } } else { AddScanToRun(scan.Scan, run); } cvParamsRead = true; } } }