Example #1
0
        static void Main(string[] args)
        {
            try
            {
                Parser.Default.ParseArguments <Options>(args).WithParsed(options =>
                {
                    if (options.Verbose)
                    {
                        SetVerboseLogging();
                    }
                    bool combine             = options.Combine;
                    string dateTime          = DateTime.Now.ToString("yyyy-MM-dd_hh-mm-ss");
                    List <string> inputFiles = new List <string>();

                    if (options.LoadFromDirectory != null && options.LoadFromDirectory == true)//multiple files
                    {
                        var directoryPath = Path.GetDirectoryName(options.InputFile);
                        DirectoryInfo di  = new DirectoryInfo(directoryPath);
                        Logger.Info("Attempting to load files with the extension .mzml in the following directory: {0}", directoryPath);
                        foreach (var file in di.GetFiles("*.mzml", SearchOption.TopDirectoryOnly))
                        {
                            inputFiles.Add(file.FullName);
                        }

                        if (inputFiles.Count == 0)
                        {
                            Logger.Error("Unable to locate any MZML files in {0} directory", directoryPath);
                            throw new FileNotFoundException();
                        }
                    }
                    else //single file
                    {
                        inputFiles.Add(options.InputFile);
                    }

                    foreach (string inputFilePath in inputFiles)
                    {
                        bool lastFile = false;//saving whether its the last file or not, so if we need to combine all the files in the end, we know when the end is.
                        string fileSpecificDirectory = DirectoryCreator.CreateOutputDirectory(inputFilePath, dateTime);
                        if (inputFilePath == inputFiles.Last())
                        {
                            lastFile = true;
                        }
                        Logger.Info("Loading file: {0}", inputFilePath);
                        Stopwatch sw = new Stopwatch();
                        sw.Start();

                        int division;
                        if (options.Division < 100 && options.Division > 0)
                        {
                            division = options.Division;
                        }
                        else
                        {
                            Logger.Error("Number of divisions must be within the range 1 - 100. You have input: {0}", options.Division);
                            throw new ArgumentOutOfRangeException();
                        }
                        bool irt = !String.IsNullOrEmpty(options.IRTFile);

                        MzmlParser.MzmlReader mzmlParser = new MzmlParser.MzmlReader
                        {
                            ParseBinaryData = options.ParseBinaryData ?? true,
                            Threading       = options.Threading ?? true,
                            MaxQueueSize    = options.MaxQueueSize,
                            MaxThreads      = options.MaxThreads
                        };

                        CheckFileIsReadableOrComplain(inputFilePath);

                        AnalysisSettings analysisSettings = new AnalysisSettings()
                        {
                            MassTolerance      = options.MassTolerance,
                            RtTolerance        = options.RtTolerance,
                            IrtMinIntensity    = options.IrtMinIntensity,
                            IrtMinPeptides     = options.IrtMinTransitions,
                            IrtMassTolerance   = options.IrtMassTolerance,
                            CacheSpectraToDisk = options.Cache,
                            MinimumIntensity   = options.MinimumIntensity,
                            RunEndTime         = options.RunEndTime
                        };

                        if (!String.IsNullOrEmpty(options.IRTFile))
                        {
                            irt = true;
                            if (options.IRTFile.ToLower().EndsWith("traml", StringComparison.InvariantCultureIgnoreCase))
                            {
                                TraMLReader traMLReader     = new TraMLReader();
                                analysisSettings.IrtLibrary = traMLReader.LoadLibrary(options.IRTFile);
                            }
                            else if (options.IRTFile.ToLower().EndsWith("tsv", StringComparison.InvariantCultureIgnoreCase) || options.IRTFile.ToLower().EndsWith("csv", StringComparison.InvariantCultureIgnoreCase))
                            {
                                SVReader svReader           = new SVReader();
                                analysisSettings.IrtLibrary = svReader.LoadLibrary(options.IRTFile);
                            }
                        }
                        MzmlParser.Run run            = mzmlParser.LoadMzml(inputFilePath, analysisSettings);
                        AnalysisSettingsFileWriter Aw = new AnalysisSettingsFileWriter();
                        if (inputFiles.Count() > 1 && lastFile)//multiple files and this is the last
                        {
                            Aw.WriteASFile(run, dateTime, inputFiles);
                        }
                        else //only one file
                        {
                            Aw.WriteASFile(run, dateTime, inputFilePath);
                        }

                        Logger.Info("Generating metrics...", Convert.ToInt32(sw.Elapsed.TotalSeconds));
                        var swameMetrics = new SwaMe.MetricGenerator().GenerateMetrics(run, division, inputFilePath, irt, combine, lastFile, dateTime);
                        var progMetrics  = new Prognosticator.MetricGenerator().GenerateMetrics(run);

                        var metrics       = swameMetrics.Union(progMetrics).ToDictionary(k => k.Key, v => v.Value);
                        string[] mzQCName = { dateTime, Path.GetFileNameWithoutExtension(inputFilePath), "mzQC.json" };
                        Directory.SetCurrentDirectory(fileSpecificDirectory);
                        new MzqcGenerator.MzqcWriter().BuildMzqcAndWrite(string.Join("_", mzQCName), run, metrics, inputFilePath);
                        Logger.Info("Generated metrics in {0} seconds", Convert.ToInt32(sw.Elapsed.TotalSeconds));

                        if (analysisSettings.CacheSpectraToDisk)
                        {
                            Logger.Info("Deleting temp files...");
                            mzmlParser.DeleteTempFiles(run);
                        }
                        Logger.Info("Done!");
                    }
                });
            }
            catch (Exception ex)
            {
                Logger.Error("An unexpected error occured:");
                Logger.Error(ex.Message);
                Logger.Error(ex.StackTrace);
                LogManager.Shutdown();
                Environment.Exit(1);
            }
            LogManager.Shutdown();
            Environment.Exit(0);
        }
Example #2
0
        private static void ParseBase64Data(ScanAndTempProperties scan, Run run, bool threading, bool irt)
        {
            float[] intensities = ExtractFloatArray(scan.Base64IntensityArray, scan.IntensityZlibCompressed, scan.IntensityBitLength);
            float[] mzs         = ExtractFloatArray(scan.Base64MzArray, scan.MzZlibCompressed, scan.MzBitLength);

            if (intensities.Count() == 0)
            {
                intensities = FillZeroArray(intensities);
                mzs         = FillZeroArray(mzs);
                logger.Debug("Empty binary array for a MS{0} scan in cycle number: {1}. The empty scans have been filled with zero values.", scan.Scan.MsLevel, scan.Scan.Cycle);

                run.MissingScans++;
            }
            var spectrum = intensities.Select((x, i) => new SpectrumPoint(x, mzs[i], (float)scan.Scan.ScanStartTime)).Where(x => x.Intensity >= run.AnalysisSettings.MinimumIntensity).ToList();


            //Predicted singly charged proportion:

            //The theory is that an M and M+1 pair are singly charged so we are very simply just looking for  occurences where two ions are 1 mz apart (+-massTolerance)

            //We therefore create an array cusums that accumulates the difference between ions, so for every ion we calculate the distance between that ion
            //and the previous and add that to each of the previous ions' cusum of differences. If the cusum of an ion overshoots 1 +massTolerance, we stop adding to it, if it reaches our mark we count it and stop adding to it

            List <int> indexes = new List <int>();

            float[] cusums      = new float[mzs.Length];
            int     movingPoint = 0;
            double  minimum     = 1 - 0.001;
            double  maximum     = 1 + 0.001;

            for (int i = 1; i < mzs.Length; i++)
            {
                float distance         = mzs[i] - mzs[i - 1];
                bool  matchedWithLower = false;
                for (int ii = movingPoint; ii < i; ii++)
                {
                    cusums[ii] += distance;
                    if (cusums[ii] < minimum)
                    {
                        continue;
                    }
                    else if (cusums[ii] > minimum && cusums[ii] < maximum)
                    {
                        if (!matchedWithLower)//This is to try and minimise false positives where for example if you have an array: 351.14, 351.15, 352.14 all three get chosen.
                        {
                            indexes.Add(i);
                            indexes.Add(movingPoint);
                        }
                        movingPoint     += 1;
                        matchedWithLower = true;
                        continue;
                    }
                    else if (cusums[ii] > maximum)
                    {
                        movingPoint += 1;
                    }
                }
            }
            int distinct = indexes.Distinct().Count();
            int len      = mzs.Length;

            scan.Scan.ProportionChargeStateOne = (double)distinct / (double)len;

            if (scan.Scan.TotalIonCurrent == 0)
            {
                scan.Scan.TotalIonCurrent = intensities.Sum();
                TicNotFound = true;
            }
            scan.Scan.Spectrum = new Spectrum()
            {
                SpectrumPoints = spectrum
            };
            scan.Scan.IsolationWindowLowerBoundary = scan.Scan.IsolationWindowTargetMz - scan.Scan.IsolationWindowLowerOffset;
            scan.Scan.IsolationWindowUpperBoundary = scan.Scan.IsolationWindowTargetMz + scan.Scan.IsolationWindowUpperOffset;

            scan.Scan.Density           = spectrum.Count();
            scan.Scan.BasePeakIntensity = intensities.Max();
            scan.Scan.BasePeakMz        = mzs[Array.IndexOf(intensities, intensities.Max())];
            AddScanToRun(scan.Scan, run);
            float basepeakIntensity;

            if (intensities.Count() > 0)
            {
                basepeakIntensity = intensities.Max();
                int    maxIndex = intensities.ToList().IndexOf(basepeakIntensity);
                double mz       = mzs[maxIndex];

                if (run.BasePeaks.Count(x => Math.Abs(x.Mz - mz) < run.AnalysisSettings.MassTolerance) < 1)//If a basepeak with this mz doesn't exist yet add it
                {
                    BasePeak bp = new BasePeak(mz, scan.Scan.ScanStartTime, basepeakIntensity);
                    run.BasePeaks.Add(bp);
                }
                else //we do have a match, now lets figure out if they fall within the rtTolerance
                {
                    //find out which basepeak
                    foreach (BasePeak thisbp in run.BasePeaks.Where(x => Math.Abs(x.Mz - mz) < run.AnalysisSettings.MassTolerance))
                    {
                        bool found = false;
                        for (int rt = 0; rt < thisbp.BpkRTs.Count(); rt++)
                        {
                            if (Math.Abs(thisbp.BpkRTs[rt] - scan.Scan.ScanStartTime) < run.AnalysisSettings.RtTolerance)//this is part of a previous basepeak, or at least considered to be
                            {
                                found = true;
                                break;
                            }
                        }
                        if (!found)//This is considered to be a new instance
                        {
                            thisbp.BpkRTs.Add(scan.Scan.ScanStartTime);
                            thisbp.Intensities.Add(basepeakIntensity);
                        }
                    }
                }
            }
            else
            {
                basepeakIntensity = 0;
            }
            //Extract info for Basepeak chromatograms

            if (irt)
            {
                FindIrtPeptideCandidates(scan, run, spectrum);
            }

            if (threading)
            {
                cde.Signal();
            }
        }
Example #3
0
 private void FindMs2IsolationWindows(Run run)
 {
     run.IsolationWindows = run.Ms2Scans.Select(x => (x.IsolationWindowTargetMz - x.IsolationWindowLowerOffset, x.IsolationWindowTargetMz + x.IsolationWindowUpperOffset)).Distinct().ToList();
     logger.Debug("{0} isolation windows detected: min {1} max {2}", run.IsolationWindows.Count, run.IsolationWindows.Min(x => x.Item2 - x.Item1), run.IsolationWindows.Max(x => x.Item2 - x.Item1));
 }
Example #4
0
        public void ReadSpectrum(XmlReader reader, Run run, bool irt)
        {
            ScanAndTempProperties scan = new ScanAndTempProperties(run.AnalysisSettings.CacheSpectraToDisk);

            //The cycle number is within a kvp string in the following format: "sample=1 period=1 cycle=1 experiment=1"
            //
            //This is a bit code-soup but I didn't want to spend more than one line on it and it should be robust enough not just to select on index
            //
            //This has only been tested on Sciex converted data
            //
            //Paul Brack 2019/04/03

            bool CycleInfoInID = false;

            if (run.SourceFileTypes[0].EndsWith("wiff", StringComparison.InvariantCultureIgnoreCase) || run.SourceFileTypes[0].ToUpper().EndsWith("scan", StringComparison.InvariantCultureIgnoreCase))
            {
                if (!string.IsNullOrEmpty(reader.GetAttribute("id")) && !string.IsNullOrEmpty(reader.GetAttribute("id").Split(' ').DefaultIfEmpty("0").Single(x => x.Contains("cycle"))))
                {
                    scan.Scan.Cycle = int.Parse(reader.GetAttribute("id").Split(' ').DefaultIfEmpty("0").Single(x => x.Contains("cycle")).Split('=').Last());
                    if (scan.Scan.Cycle != 0)    //Some wiffs don't have that info so let's check
                    {
                        CycleInfoInID = true;
                    }
                }
            }

            bool cvParamsRead = false;

            while (reader.Read() && !cvParamsRead)
            {
                if (reader.IsStartElement())
                {
                    if (reader.LocalName == "cvParam")
                    {
                        switch (reader.GetAttribute("accession"))
                        {
                        case "MS:1000511":
                            scan.Scan.MsLevel = int.Parse(reader.GetAttribute("value"));
                            break;

                        case "MS:1000285":
                            scan.Scan.TotalIonCurrent = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture);
                            break;

                        case "MS:1000016":
                            scan.Scan.ScanStartTime = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture);
                            run.StartTime           = Math.Min(run.StartTime, scan.Scan.ScanStartTime);
                            run.LastScanTime        = Math.Max(run.LastScanTime, scan.Scan.ScanStartTime);//technically this is the starttime of the last scan not the completion time
                            break;

                        case "MS:1000829":
                            scan.Scan.IsolationWindowUpperOffset = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture);
                            break;

                        case "MS:1000828":
                            scan.Scan.IsolationWindowLowerOffset = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture);
                            break;

                        case "MS:1000827":
                            scan.Scan.IsolationWindowTargetMz = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture);
                            break;
                        }
                    }
                    else if (reader.LocalName == "binaryDataArray")
                    {
                        GetBinaryData(reader, scan);
                    }
                    if (scan.Scan.MsLevel == null && reader.LocalName == "referenceableParamGroupRef")
                    {
                        if (reader.GetAttribute("ref") == SurveyScanReferenceableParamGroupId)
                        {
                            scan.Scan.MsLevel = 1;
                        }
                        else
                        {
                            scan.Scan.MsLevel = 2;
                        }
                    }
                }
                else if (reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "spectrum")
                {
                    if (!CycleInfoInID)
                    {
                        if (scan.Scan.MsLevel == 1)
                        {
                            currentCycle++;
                            scan.Scan.Cycle = currentCycle;
                            MS1             = true;
                        }
                        //if there is ScanAndTempProperties ms1:
                        else if (MS1)
                        {
                            scan.Scan.Cycle = currentCycle;
                        }
                        //if there is no ms1:
                        else
                        {
                            if (previousTargetMz < scan.Scan.IsolationWindowTargetMz)
                            {
                                scan.Scan.Cycle = currentCycle;
                            }
                            else
                            {
                                currentCycle++;
                                scan.Scan.Cycle = currentCycle;
                            }
                        }
                    }

                    previousTargetMz = scan.Scan.IsolationWindowTargetMz;

                    if (ParseBinaryData)
                    {
                        if (Threading)
                        {
                            cde.AddCount();
                            if (run.AnalysisSettings.CacheSpectraToDisk) //this option exists largely to restrict RAM use, so don't let queue get too big
                            {
                                while (cde.CurrentCount > MaxQueueSize)
                                {
                                    Thread.Sleep(1000);
                                }
                            }

                            ThreadPool.QueueUserWorkItem(state => ParseBase64Data(scan, run, Threading, irt));
                        }
                        else
                        {
                            ParseBase64Data(scan, run, Threading, irt);
                        }
                    }
                    else
                    {
                        AddScanToRun(scan.Scan, run);
                    }
                    cvParamsRead = true;
                }
            }
        }