private void ParseAnalysisCDFFile(string directoryPath, DatasetFileInfo datasetFileInfo)
        {
            NetCDFReader.clsMSNetCdf netCDFReader = null;

            try
            {
                // Note: as of May 2016 this only works if you compile as x86 or if you enable "Prefer 32-bit" when compiling as AnyCPU
                // In contrast, XRawFileIO in clsThermoRawFileInfoScanner requires that "Prefer 32-bit" be disabled

                netCDFReader = new NetCDFReader.clsMSNetCdf();
                var success = netCDFReader.OpenMSCdfFile(Path.Combine(directoryPath, AGILENT_ANALYSIS_CDF_FILE));
                if (success)
                {
                    var scanCount = netCDFReader.GetScanCount();

                    if (scanCount > 0)
                    {
                        // Lookup the scan time of the final scan

                        if (netCDFReader.GetScanInfo(scanCount - 1, out var scanNumber,
                                                     out _, out var scanTime, out _, out _))
                        {
                            // Add 1 to scanNumber since the scan number is off by one in the CDF file
                            datasetFileInfo.ScanCount  = scanNumber + 1;
                            datasetFileInfo.AcqTimeEnd = datasetFileInfo.AcqTimeStart.Add(SecondsToTimeSpan(scanTime));
                        }
                    }
                    else
                    {
                        datasetFileInfo.ScanCount = 0;
                    }
                }
            }
            catch (Exception ex)
            {
                OnWarningEvent("Error in ParseAnalysisCDFFile: " + ex.Message);
            }
            finally
            {
                netCDFReader?.CloseMSCdfFile();
            }
        }
Beispiel #2
0
        private void ParseAnalysisCDFFile(string strFolderPath, clsDatasetFileInfo datasetFileInfo)
        {
            NetCDFReader.clsMSNetCdf objNETCDFReader = null;

            try
            {
                // Note: as of May 2016 this only works if you compile as x86 or with "Prefer 32-bit" enabled when compiling as AnyCPU

                objNETCDFReader = new NetCDFReader.clsMSNetCdf();
                var blnSuccess = objNETCDFReader.OpenMSCdfFile(Path.Combine(strFolderPath, AGILENT_ANALYSIS_CDF_FILE));
                if (blnSuccess)
                {
                    var intScanCount = objNETCDFReader.GetScanCount();

                    if (intScanCount > 0)
                    {
                        // Lookup the scan time of the final scan

                        if (objNETCDFReader.GetScanInfo(intScanCount - 1, out var intScanNumber,
                                                        out _, out var dblScanTime, out _, out _))
                        {
                            // Add 1 to intScanNumber since the scan number is off by one in the CDF file
                            datasetFileInfo.ScanCount  = intScanNumber + 1;
                            datasetFileInfo.AcqTimeEnd = datasetFileInfo.AcqTimeStart.Add(SecondsToTimeSpan(dblScanTime));
                        }
                    }
                    else
                    {
                        datasetFileInfo.ScanCount = 0;
                    }
                }
            }
            catch (Exception ex)
            {
                OnWarningEvent("Error in ParseAnalysisCDFFile: " + ex.Message);
            }
            finally
            {
                objNETCDFReader?.CloseMSCdfFile();
            }
        }
Beispiel #3
0
        public bool ExtractScanInfoFromMGFandCDF(
            string filePath,
            clsScanList scanList,
            clsSpectraCache spectraCache,
            DataOutput.clsDataOutput dataOutputHandler,
            bool keepRawSpectra,
            bool keepMSMSSpectra)
        {
            // Returns True if Success, False if failure
            // Note: This function assumes filePath exists
            //
            // This function can be used to read a pair of MGF and NetCDF files that contain MS/MS and MS-only parent ion scans, respectively
            // Typically, this will apply to LC-MS/MS analyses acquired using an Agilent mass spectrometer running DataAnalysis software
            // filePath can contain the path to the MGF or to the CDF file; the extension will be removed in order to determine the base file name,
            // then the two files will be looked for separately

            var scanTime = 0.0;

            var cdfReader = new NetCDFReader.clsMSNetCdf();
            var mgfReader = new MSDataFileReader.clsMGFFileReader();

            try
            {
                Console.Write("Reading CDF/MGF data files ");
                ReportMessage("Reading CDF/MGF data files");

                UpdateProgress(0, "Opening data file: " + Environment.NewLine + Path.GetFileName(filePath));

                // Obtain the full path to the file
                var mgfFileInfo          = new FileInfo(filePath);
                var mgfInputFilePathFull = mgfFileInfo.FullName;

                // Make sure the extension for mgfInputFilePathFull is .MGF
                mgfInputFilePathFull = Path.ChangeExtension(mgfInputFilePathFull, AGILENT_MSMS_FILE_EXTENSION);
                var cdfInputFilePathFull = Path.ChangeExtension(mgfInputFilePathFull, AGILENT_MS_FILE_EXTENSION);

                var datasetID  = mOptions.SICOptions.DatasetID;
                var sicOptions = mOptions.SICOptions;

                var success = UpdateDatasetFileStats(mgfFileInfo, datasetID);
                mDatasetFileInfo.ScanCount = 0;

                // Open a handle to each data file
                if (!cdfReader.OpenMSCdfFile(cdfInputFilePathFull))
                {
                    ReportError("Error opening input data file: " + cdfInputFilePathFull);
                    SetLocalErrorCode(clsMASIC.eMasicErrorCodes.InputFileAccessError);
                    return(false);
                }

                if (!mgfReader.OpenFile(mgfInputFilePathFull))
                {
                    ReportError("Error opening input data file: " + mgfInputFilePathFull);
                    SetLocalErrorCode(clsMASIC.eMasicErrorCodes.InputFileAccessError);
                    return(false);
                }

                var msScanCount = cdfReader.GetScanCount();
                mDatasetFileInfo.ScanCount = msScanCount;

                if (msScanCount <= 0)
                {
                    // No scans found
                    ReportError("No scans found in the input file: " + cdfInputFilePathFull);
                    SetLocalErrorCode(clsMASIC.eMasicErrorCodes.InputFileAccessError);
                    return(false);
                }

                // Reserve memory for all of the Survey Scan data
                scanList.Initialize();
                var scanCount = mOptions.SICOptions.ScanRangeCount;
                if (scanCount <= 0)
                {
                    scanCount = msScanCount * 3;
                }
                scanList.ReserveListCapacity(scanCount, Math.Min(msScanCount, scanCount / 3)); // Assume there are 2 frag scans for every survey scan
                mScanTracking.ReserveListCapacity(scanCount);
                spectraCache.SpectrumCount = Math.Max(spectraCache.SpectrumCount, scanCount);

                UpdateProgress("Reading CDF/MGF data (" + msScanCount.ToString() + " scans)" + Environment.NewLine + Path.GetFileName(filePath));
                ReportMessage("Reading CDF/MGF data; Total MS scan count: " + msScanCount.ToString());

                // Read all of the Survey scans from the CDF file
                // CDF files created by the Agilent XCT list the first scan number as 0; use scanNumberCorrection to correct for this
                var scanNumberCorrection = 0;
                for (var msScanIndex = 0; msScanIndex < msScanCount; msScanIndex++)
                {
                    success = cdfReader.GetScanInfo(msScanIndex, out var scanNumber, out var scanTotalIntensity, out scanTime, out _, out _);

                    if (msScanIndex == 0 && scanNumber == 0)
                    {
                        scanNumberCorrection = 1;
                    }

                    if (!success)
                    {
                        // Error reading CDF file
                        ReportError("Error obtaining data from CDF file: " + cdfInputFilePathFull);
                        SetLocalErrorCode(clsMASIC.eMasicErrorCodes.InputFileDataReadError);
                        return(false);
                    }

                    if (scanNumberCorrection > 0)
                    {
                        scanNumber += scanNumberCorrection;
                    }

                    if (mScanTracking.CheckScanInRange(scanNumber, scanTime, sicOptions))
                    {
                        var newSurveyScan = new clsScanInfo
                        {
                            ScanNumber        = scanNumber,
                            TotalIonIntensity = (float)scanTotalIntensity,    // Copy the Total Scan Intensity to .TotalIonIntensity
                            ScanHeaderText    = string.Empty,
                            ScanTypeName      = "MS",
                        };

                        if (mOptions.CDFTimeInSeconds)
                        {
                            newSurveyScan.ScanTime = (float)(scanTime / 60);
                        }
                        else
                        {
                            newSurveyScan.ScanTime = (float)scanTime;
                        }

                        // Survey scans typically lead to multiple parent ions; we do not record them here
                        newSurveyScan.FragScanInfo.ParentIonInfoIndex = -1;

                        scanList.SurveyScans.Add(newSurveyScan);

                        success = cdfReader.GetMassSpectrum(msScanIndex, out var mzData,
                                                            out var intensityData,
                                                            out var intIonCount, out _);

                        if (success && intIonCount > 0)
                        {
                            var msSpectrum = new clsMSSpectrum(scanNumber, mzData, intensityData, intIonCount);

                            double msDataResolution;

                            newSurveyScan.IonCount    = msSpectrum.IonCount;
                            newSurveyScan.IonCountRaw = newSurveyScan.IonCount;

                            // Find the base peak ion mass and intensity
                            newSurveyScan.BasePeakIonMZ = FindBasePeakIon(msSpectrum.IonsMZ,
                                                                          msSpectrum.IonsIntensity,
                                                                          out var basePeakIonIntensity,
                                                                          out var mzMin, out var mzMax);
                            newSurveyScan.BasePeakIonIntensity = basePeakIonIntensity;

                            // Determine the minimum positive intensity in this scan
                            newSurveyScan.MinimumPositiveIntensity = mPeakFinder.FindMinimumPositiveValue(msSpectrum.IonsIntensity, 0);

                            if (sicOptions.SICToleranceIsPPM)
                            {
                                // Define MSDataResolution based on the tolerance value that will be used at the lowest m/z in this spectrum, divided by COMPRESS_TOLERANCE_DIVISOR
                                // However, if the lowest m/z value is < 100, then use 100 m/z
                                if (mzMin < 100)
                                {
                                    msDataResolution = clsParentIonProcessing.GetParentIonToleranceDa(sicOptions, 100) /
                                                       sicOptions.CompressToleranceDivisorForPPM;
                                }
                                else
                                {
                                    msDataResolution = clsParentIonProcessing.GetParentIonToleranceDa(sicOptions, mzMin) /
                                                       sicOptions.CompressToleranceDivisorForPPM;
                                }
                            }
                            else
                            {
                                msDataResolution = sicOptions.SICTolerance / sicOptions.CompressToleranceDivisorForDa;
                            }

                            mScanTracking.ProcessAndStoreSpectrum(
                                newSurveyScan, this,
                                spectraCache, msSpectrum,
                                sicOptions.SICPeakFinderOptions.MassSpectraNoiseThresholdOptions,
                                clsMASIC.DISCARD_LOW_INTENSITY_MS_DATA_ON_LOAD,
                                sicOptions.CompressMSSpectraData,
                                msDataResolution,
                                keepRawSpectra);
                        }
                        else
                        {
                            newSurveyScan.IonCount    = 0;
                            newSurveyScan.IonCountRaw = 0;
                        }

                        // Note: Since we're reading all of the Survey Scan data, we cannot update .MasterScanOrder() at this time
                    }

                    // Note: We need to take msScanCount * 2 since we have to read two different files
                    if (msScanCount > 1)
                    {
                        UpdateProgress((short)(msScanIndex / (double)(msScanCount * 2 - 1) * 100));
                    }
                    else
                    {
                        UpdateProgress(0);
                    }

                    UpdateCacheStats(spectraCache);
                    if (mOptions.AbortProcessing)
                    {
                        scanList.ProcessingIncomplete = true;
                        break;
                    }

                    if (msScanIndex % 100 == 0)
                    {
                        ReportMessage("Reading MS scan index: " + msScanIndex.ToString());
                        Console.Write(".");
                    }
                }

                // Record the current memory usage (before we close the .CDF file)
                OnUpdateMemoryUsage();

                cdfReader.CloseMSCdfFile();

                // We loaded all of the survey scan data above
                // We can now initialize .MasterScanOrder()
                var lastSurveyScanIndex = 0;

                scanList.AddMasterScanEntry(clsScanList.eScanTypeConstants.SurveyScan, lastSurveyScanIndex);

                var surveyScansRecorded = new SortedSet <int>()
                {
                    lastSurveyScanIndex
                };

                // Reset scanNumberCorrection; we might also apply it to MS/MS data
                scanNumberCorrection = 0;

                // Now read the MS/MS data from the MGF file
                do
                {
                    var fragScanFound = mgfReader.ReadNextSpectrum(out var spectrumInfo);
                    if (!fragScanFound)
                    {
                        break;
                    }

                    mDatasetFileInfo.ScanCount += 1;

                    while (spectrumInfo.ScanNumber < scanList.SurveyScans[lastSurveyScanIndex].ScanNumber)
                    {
                        // The scan number for the current MS/MS spectrum is less than the last survey scan index scan number
                        // This can happen, due to oddities with combining scans when creating the .MGF file
                        // Need to decrement lastSurveyScanIndex until we find the appropriate survey scan

                        lastSurveyScanIndex -= 1;
                        if (lastSurveyScanIndex == 0)
                        {
                            break;
                        }
                    }

                    if (scanNumberCorrection == 0)
                    {
                        // See if udtSpectrumHeaderInfo.ScanNumberStart is equivalent to one of the survey scan numbers, yielding conflicting scan numbers
                        // If it is, then there is an indexing error in the .MGF file; this error was present in .MGF files generated with
                        // an older version of Agilent Chemstation.  These files typically have lines like ###MSMS: #13-29 instead of ###MSMS: #13/29/
                        // If this indexing error is found, then we'll set scanNumberCorrection = 1 and apply it to all subsequent MS/MS scans;
                        // we'll also need to correct prior MS/MS scans
                        for (var surveyScanIndex = lastSurveyScanIndex; surveyScanIndex < scanList.SurveyScans.Count; surveyScanIndex++)
                        {
                            if (scanList.SurveyScans[surveyScanIndex].ScanNumber == spectrumInfo.ScanNumber)
                            {
                                // Conflicting scan numbers were found
                                scanNumberCorrection = 1;

                                // Need to update prior MS/MS scans
                                foreach (var fragScan in scanList.FragScans)
                                {
                                    fragScan.ScanNumber += scanNumberCorrection;
                                    var scanTimeInterpolated = InterpolateRTandFragScanNumber(
                                        scanList.SurveyScans, 0, fragScan.ScanNumber, out var fragScanIterationOut);
                                    fragScan.FragScanInfo.FragScanNumber = fragScanIterationOut;

                                    fragScan.ScanTime = scanTimeInterpolated;
                                }

                                break;
                            }

                            if (scanList.SurveyScans[surveyScanIndex].ScanNumber > spectrumInfo.ScanNumber)
                            {
                                break;
                            }
                        }
                    }

                    if (scanNumberCorrection > 0)
                    {
                        spectrumInfo.ScanNumber    += scanNumberCorrection;
                        spectrumInfo.ScanNumberEnd += scanNumberCorrection;
                    }

                    scanTime = InterpolateRTandFragScanNumber(
                        scanList.SurveyScans, lastSurveyScanIndex, spectrumInfo.ScanNumber, out var fragScanIteration);

                    // Make sure this fragmentation scan isn't present yet in scanList.FragScans
                    // This can occur in Agilent .MGF files if the scan is listed both singly and grouped with other MS/MS scans
                    var validFragScan = true;
                    foreach (var fragScan in scanList.FragScans)
                    {
                        if (fragScan.ScanNumber == spectrumInfo.ScanNumber)
                        {
                            // Duplicate found
                            validFragScan = false;
                            break;
                        }
                    }

                    if (!(validFragScan && mScanTracking.CheckScanInRange(spectrumInfo.ScanNumber, scanTime, sicOptions)))
                    {
                        continue;
                    }

                    // See if lastSurveyScanIndex needs to be updated
                    // At the same time, populate .MasterScanOrder
                    while (lastSurveyScanIndex < scanList.SurveyScans.Count - 1 &&
                           spectrumInfo.ScanNumber > scanList.SurveyScans[lastSurveyScanIndex + 1].ScanNumber)
                    {
                        lastSurveyScanIndex += 1;

                        // Add the given SurveyScan to .MasterScanOrder, though only if it hasn't yet been added
                        if (!surveyScansRecorded.Contains(lastSurveyScanIndex))
                        {
                            surveyScansRecorded.Add(lastSurveyScanIndex);
                            scanList.AddMasterScanEntry(clsScanList.eScanTypeConstants.SurveyScan,
                                                        lastSurveyScanIndex);
                        }
                    }

                    scanList.AddMasterScanEntry(clsScanList.eScanTypeConstants.FragScan, scanList.FragScans.Count,
                                                spectrumInfo.ScanNumber, (float)scanTime);

                    var newFragScan = new clsScanInfo
                    {
                        ScanNumber     = spectrumInfo.ScanNumber,
                        ScanTime       = (float)scanTime,
                        ScanHeaderText = string.Empty,
                        ScanTypeName   = "MSn",
                    };

                    newFragScan.FragScanInfo.FragScanNumber = fragScanIteration;
                    newFragScan.FragScanInfo.MSLevel        = 2;
                    newFragScan.MRMScanInfo.MRMMassCount    = 0;

                    scanList.FragScans.Add(newFragScan);

                    var msSpectrum = new clsMSSpectrum(newFragScan.ScanNumber, spectrumInfo.MZList, spectrumInfo.IntensityList, spectrumInfo.DataCount);

                    if (spectrumInfo.DataCount > 0)
                    {
                        newFragScan.IonCount    = msSpectrum.IonCount;
                        newFragScan.IonCountRaw = newFragScan.IonCount;

                        // Find the base peak ion mass and intensity
                        newFragScan.BasePeakIonMZ = FindBasePeakIon(msSpectrum.IonsMZ, msSpectrum.IonsIntensity,
                                                                    out var basePeakIonIntensity,
                                                                    out _, out _);

                        newFragScan.BasePeakIonIntensity = basePeakIonIntensity;

                        // Compute the total scan intensity
                        newFragScan.TotalIonIntensity = 0;
                        for (var ionIndex = 0; ionIndex < newFragScan.IonCount; ionIndex++)
                        {
                            newFragScan.TotalIonIntensity += msSpectrum.IonsIntensity[ionIndex];
                        }

                        // Determine the minimum positive intensity in this scan
                        newFragScan.MinimumPositiveIntensity = mPeakFinder.FindMinimumPositiveValue(msSpectrum.IonsIntensity, 0);

                        var msDataResolution = mOptions.BinningOptions.BinSize / sicOptions.CompressToleranceDivisorForDa;
                        var keepRawSpectrum  = keepRawSpectra && keepMSMSSpectra;

                        mScanTracking.ProcessAndStoreSpectrum(
                            newFragScan, this,
                            spectraCache, msSpectrum,
                            sicOptions.SICPeakFinderOptions.MassSpectraNoiseThresholdOptions,
                            clsMASIC.DISCARD_LOW_INTENSITY_MSMS_DATA_ON_LOAD,
                            sicOptions.CompressMSMSSpectraData,
                            msDataResolution,
                            keepRawSpectrum);
                    }
                    else
                    {
                        newFragScan.IonCount          = 0;
                        newFragScan.IonCountRaw       = 0;
                        newFragScan.TotalIonIntensity = 0;
                    }

                    mParentIonProcessor.AddUpdateParentIons(scanList, lastSurveyScanIndex, spectrumInfo.ParentIonMZ,
                                                            scanList.FragScans.Count - 1, spectraCache, sicOptions);

                    // Note: We need to take msScanCount * 2, in addition to adding msScanCount to lastSurveyScanIndex, since we have to read two different files
                    if (msScanCount > 1)
                    {
                        UpdateProgress((short)((lastSurveyScanIndex + msScanCount) / (double)(msScanCount * 2 - 1) * 100));
                    }
                    else
                    {
                        UpdateProgress(0);
                    }

                    UpdateCacheStats(spectraCache);
                    if (mOptions.AbortProcessing)
                    {
                        scanList.ProcessingIncomplete = true;
                        break;
                    }

                    if (scanList.FragScans.Count % 100 == 0)
                    {
                        ReportMessage("Reading MSMS scan index: " + scanList.FragScans.Count);
                        Console.Write(".");
                    }
                }while (true);

                // Record the current memory usage (before we close the .MGF file)
                OnUpdateMemoryUsage();

                mgfReader.CloseFile();

                // Check for any other survey scans that need to be added to MasterScanOrder

                // See if lastSurveyScanIndex needs to be updated
                // At the same time, populate .MasterScanOrder
                while (lastSurveyScanIndex < scanList.SurveyScans.Count - 1)
                {
                    lastSurveyScanIndex += 1;

                    // Note that scanTime is the scan time of the most recent survey scan processed in the above Do loop, so it's not accurate
                    if (mScanTracking.CheckScanInRange(scanList.SurveyScans[lastSurveyScanIndex].ScanNumber, scanTime, sicOptions))
                    {
                        // Add the given SurveyScan to .MasterScanOrder, though only if it hasn't yet been added
                        if (!surveyScansRecorded.Contains(lastSurveyScanIndex))
                        {
                            surveyScansRecorded.Add(lastSurveyScanIndex);

                            scanList.AddMasterScanEntry(clsScanList.eScanTypeConstants.SurveyScan, lastSurveyScanIndex);
                        }
                    }
                }

                // Make sure that MasterScanOrder really is sorted by scan number
                ValidateMasterScanOrderSorting(scanList);

                // Now that all of the data has been read, write out to the scan stats file, in order of scan number
                for (var scanIndex = 0; scanIndex < scanList.MasterScanOrderCount; scanIndex++)
                {
                    var         eScanType = scanList.MasterScanOrder[scanIndex].ScanType;
                    clsScanInfo currentScan;

                    if (eScanType == clsScanList.eScanTypeConstants.SurveyScan)
                    {
                        // Survey scan
                        currentScan = scanList.SurveyScans[scanList.MasterScanOrder[scanIndex].ScanIndexPointer];
                    }
                    else
                    {
                        // Frag Scan
                        currentScan = scanList.FragScans[scanList.MasterScanOrder[scanIndex].ScanIndexPointer];
                    }

                    SaveScanStatEntry(dataOutputHandler.OutputFileHandles.ScanStats, eScanType, currentScan, sicOptions.DatasetID);
                }

                Console.WriteLine();

                scanList.SetListCapacityToCount();
                mScanTracking.SetListCapacityToCount();

                return(success);
            }
            catch (Exception ex)
            {
                ReportError("Error in ExtractScanInfoFromMGFandCDF", ex, clsMASIC.eMasicErrorCodes.InputFileDataReadError);
                return(false);
            }
        }