Ejemplo n.º 1
0
        /// <summary>
        /// Parse out a scan number or scan number range from data
        /// </summary>
        /// <param name="data">Single integer or two integers separated by a dash</param>
        /// <param name="spectrumInfo"></param>
        /// <returns>True if the scan number was found, otherwise false</returns>
        private bool ExtractScanRange(string data, SpectrumInfo spectrumInfo)
        {
            var scanNumberFound = false;
            var charIndex       = data.IndexOf('-');

            if (charIndex > 0)
            {
                // data contains a dash, and thus a range of scans
                var remaining = data.Substring(charIndex + 1).Trim();
                data = data.Substring(0, charIndex).Trim();

                if (IsNumber(data))
                {
                    spectrumInfo.ScanNumber = int.Parse(data);

                    if (IsNumber(remaining))
                    {
                        if (spectrumInfo.ScanNumberEnd == 0)
                        {
                            spectrumInfo.ScanNumberEnd = int.Parse(remaining);
                        }
                    }
                    else
                    {
                        spectrumInfo.ScanNumberEnd = spectrumInfo.ScanNumber;
                    }

                    scanNumberFound = true;
                }
            }
            else if (IsNumber(data))
            {
                spectrumInfo.ScanNumber = int.Parse(data);

                if (spectrumInfo.ScanNumberEnd == 0)
                {
                    spectrumInfo.ScanNumberEnd = spectrumInfo.ScanNumber;
                }

                scanNumberFound = true;
            }

            if (!scanNumberFound)
            {
                return(false);
            }

            mCurrentSpectrum.SpectrumID = mCurrentSpectrum.ScanNumber;

            if (spectrumInfo.ScanNumber == spectrumInfo.ScanNumberEnd || spectrumInfo.ScanNumber > spectrumInfo.ScanNumberEnd)
            {
                mCurrentSpectrum.ScanCount = 1;
            }
            else
            {
                mCurrentSpectrum.ScanCount = spectrumInfo.ScanNumberEnd - spectrumInfo.ScanNumber + 1;
            }

            return(true);
        }
        /// <summary>
        /// Read the next spectrum from a _dta.txt file
        /// </summary>
        /// <remarks>
        /// If mCombineIdenticalSpectra is true, combines spectra that have the same scan number but different charge state
        /// </remarks>
        /// <param name="spectrumInfo"></param>
        /// <returns>True if a spectrum is found, otherwise false</returns>
        public override bool ReadNextSpectrum(out SpectrumInfo spectrumInfo)
        {
            var spectrumFound = false;

            try
            {
                if (ReadingAndStoringSpectra || mCurrentSpectrum is null)
                {
                    mCurrentSpectrum = new SpectrumInfoMsMsText();
                }
                else
                {
                    mCurrentSpectrum.Clear();
                }

                if (mFileReader is null)
                {
                    spectrumInfo  = new SpectrumInfo();
                    mErrorMessage = "Data file not currently open";
                    return(false);
                }

                AddNewRecentFileText(string.Empty, true, false);
                var lastProgressUpdateLine = mInFileLineNumber;

                while (!spectrumFound && mFileReader.Peek() > -1 && !mAbortProcessing)
                {
                    string lineIn;

                    if (mHeaderSaved.Length > 0)
                    {
                        lineIn       = mHeaderSaved;
                        mHeaderSaved = string.Empty;
                    }
                    else
                    {
                        lineIn = mFileReader.ReadLine();

                        if (lineIn != null)
                        {
                            mTotalBytesRead += lineIn.Length + 2;
                        }
                        mInFileLineNumber++;
                    }

                    // See if lineIn is nothing or starts with the comment line character (equals sign)
                    if (lineIn != null && lineIn.Trim().StartsWith(CommentLineStartChar.ToString()))
                    {
                        AddNewRecentFileText(lineIn);
                        {
                            mCurrentSpectrum.SpectrumTitleWithCommentChars = lineIn;
                            mCurrentSpectrum.SpectrumTitle = CleanupComment(lineIn, CommentLineStartChar, true);

                            ExtractScanInfoFromDtaHeader(mCurrentSpectrum.SpectrumTitle, out var scanNumberStart, out var scanNumberEnd, out var scanCount);
                            mCurrentSpectrum.ScanNumber    = scanNumberStart;
                            mCurrentSpectrum.ScanNumberEnd = scanNumberEnd;
                            mCurrentSpectrum.ScanCount     = scanCount;
                            mCurrentSpectrum.MSLevel       = 2;
                            mCurrentSpectrum.SpectrumID    = mCurrentSpectrum.ScanNumber;
                        }

                        // Read the next line, which should have the parent ion MH value and charge
                        if (mFileReader.Peek() > -1)
                        {
                            lineIn = mFileReader.ReadLine();
                        }
                        else
                        {
                            lineIn = string.Empty;
                        }

                        if (lineIn != null)
                        {
                            mTotalBytesRead += lineIn.Length + 2;
                        }

                        mInFileLineNumber++;

                        if (string.IsNullOrWhiteSpace(lineIn))
                        {
                            // Spectrum header is not followed by a parent ion value and charge; ignore the line
                        }
                        else
                        {
                            AddNewRecentFileText(lineIn);

                            // Parse the parent ion info and read the MsMs Data
                            spectrumFound = ReadSingleSpectrum(
                                mFileReader, lineIn,
                                out mCurrentMsMsDataList,
                                mCurrentSpectrum,
                                ref mInFileLineNumber,
                                ref lastProgressUpdateLine,
                                out var mostRecentLineIn);

                            if (spectrumFound)
                            {
                                mCurrentSpectrum.ClearMzAndIntensityData();

                                if (ReadTextDataOnly)
                                {
                                    // Do not parse the text data to populate .MzList and .IntensityList
                                    mCurrentSpectrum.PeaksCount = 0;
                                }
                                else
                                {
                                    try
                                    {
                                        ParseMsMsDataList(mCurrentMsMsDataList, out var mzList, out var intensityList);

                                        mCurrentSpectrum.PeaksCount = mzList.Count;
                                        mCurrentSpectrum.StoreIons(mzList, intensityList);

                                        mCurrentSpectrum.Validate(true, true);
                                    }
                                    catch (Exception ex)
                                    {
                                        mCurrentSpectrum.PeaksCount = 0;
                                        spectrumFound = false;
                                    }
                                }
                            }

                            if (spectrumFound && CombineIdenticalSpectra && mCurrentSpectrum.ParentIonCharges[0] == 2)
                            {
                                // See if the next spectrum is the identical data, but the charge is 3 (this is a common situation with .dta files prepared by Lcq_Dta)

                                lineIn = mostRecentLineIn;

                                if (string.IsNullOrWhiteSpace(lineIn) && mFileReader.Peek() > -1)
                                {
                                    // Read the next line
                                    lineIn = mFileReader.ReadLine();

                                    if (lineIn != null)
                                    {
                                        mTotalBytesRead += lineIn.Length + 2;
                                    }

                                    mInFileLineNumber++;
                                }

                                if (lineIn != null && lineIn.StartsWith(CommentLineStartChar.ToString()))
                                {
                                    mHeaderSaved = lineIn;
                                    var compareTitle = CleanupComment(mHeaderSaved, CommentLineStartChar, true);

                                    if (compareTitle.EndsWith("3.dta", StringComparison.OrdinalIgnoreCase))
                                    {
                                        if (string.Equals(mCurrentSpectrum.SpectrumTitle.Substring(0, mCurrentSpectrum.SpectrumTitle.Length - 5), compareTitle.Substring(0, compareTitle.Length - 5), StringComparison.OrdinalIgnoreCase))
                                        {
                                            // Yes, the spectra match
                                            mCurrentSpectrum.ParentIonChargeCount = 2;
                                            mCurrentSpectrum.ParentIonCharges[1]  = 3;
                                            mCurrentSpectrum.ChargeIs2And3Plus    = true;

                                            mHeaderSaved = string.Empty;

                                            // Read the next set of lines until the next blank line or comment line is found
                                            while (mFileReader.Peek() > -1)
                                            {
                                                lineIn = mFileReader.ReadLine();
                                                mInFileLineNumber++;

                                                // See if lineIn is blank or starts with an equals sign
                                                if (lineIn != null)
                                                {
                                                    mTotalBytesRead += lineIn.Length + 2;

                                                    if (lineIn.Trim().Length == 0)
                                                    {
                                                        break;
                                                    }

                                                    if (lineIn.Trim().StartsWith(CommentLineStartChar.ToString()))
                                                    {
                                                        mHeaderSaved = lineIn;
                                                        break;
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                            else if (mostRecentLineIn.StartsWith(CommentLineStartChar.ToString()))
                            {
                                mHeaderSaved = mostRecentLineIn;
                            }
                        } // EndIf for spectrumFound = True
                    }     // EndIf for lineIn.Trim.StartsWith(mCommentLineStartChar)

                    if (mInFileLineNumber - lastProgressUpdateLine >= 250 || spectrumFound)
                    {
                        lastProgressUpdateLine = mInFileLineNumber;
                        UpdateStreamReaderProgress();
                    }
                }

                spectrumInfo = mCurrentSpectrum;

                if (spectrumFound)
                {
                    mScanCountRead++;

                    if (!ReadingAndStoringSpectra)
                    {
                        if (mInputFileStats.ScanCount < mScanCountRead)
                        {
                            mInputFileStats.ScanCount = mScanCountRead;
                        }

                        UpdateFileStats(mInputFileStats.ScanCount, spectrumInfo.ScanNumber, false);
                    }
                }
            }
            catch (Exception ex)
            {
                OnErrorEvent("Error in ReadNextSpectrum", ex);
                spectrumInfo = new SpectrumInfo();
            }

            return(spectrumFound);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Read the next spectrum from a .mgf file
        /// </summary>
        /// <param name="spectrumInfo"></param>
        /// <returns>True if a spectrum is found, otherwise false</returns>
        public override bool ReadNextSpectrum(out SpectrumInfo spectrumInfo)
        {
            var sepChars = new[] { ' ', '\t' };

            var spectrumFound = false;

            try
            {
                if (ReadingAndStoringSpectra || mCurrentSpectrum is null)
                {
                    mCurrentSpectrum = new SpectrumInfoMsMsText();
                }
                else
                {
                    mCurrentSpectrum.Clear();
                }

                var scanNumberFound = false;

                // Initialize mCurrentMsMsDataList
                if (mCurrentMsMsDataList is null)
                {
                    mCurrentMsMsDataList = new List <string>();
                }
                else
                {
                    mCurrentMsMsDataList.Clear();
                }

                if (mFileReader is null)
                {
                    spectrumInfo  = new SpectrumInfoMsMsText();
                    mErrorMessage = "Data file not currently open";
                    return(false);
                }

                AddNewRecentFileText(string.Empty, true, false);
                {
                    mCurrentSpectrum.SpectrumTitleWithCommentChars = string.Empty;
                    mCurrentSpectrum.SpectrumTitle = string.Empty;
                    mCurrentSpectrum.MSLevel       = 2;
                }

                var lastProgressUpdateLine = mInFileLineNumber;

                while (!spectrumFound && mFileReader.Peek() > -1 && !mAbortProcessing)
                {
                    var lineIn = mFileReader.ReadLine();

                    if (lineIn != null)
                    {
                        mTotalBytesRead += lineIn.Length + 2;
                    }

                    mInFileLineNumber++;

                    if (lineIn?.Trim().Length > 0)
                    {
                        AddNewRecentFileText(lineIn);
                        lineIn = lineIn.Trim();

                        // See if lineIn starts with the comment line start character (a pound sign, #)
                        if (lineIn.StartsWith(CommentLineStartChar.ToString()))
                        {
                            // Remove any comment characters at the start of lineIn
                            lineIn = lineIn.TrimStart(CommentLineStartChar).Trim();

                            // Look for LINE_START_MSMS in lineIn
                            // This will be present in MGF files created using Agilent's DataAnalysis software
                            if (lineIn.StartsWith(LINE_START_MSMS, StringComparison.OrdinalIgnoreCase))
                            {
                                lineIn = lineIn.Substring(LINE_START_MSMS.Length).Trim();

                                // Initialize these values
                                mCurrentSpectrum.ScanNumberEnd = 0;
                                mCurrentSpectrum.ScanCount     = 1;

                                // Remove the # sign in front of the scan number
                                lineIn = lineIn.TrimStart('#').Trim();

                                // Look for the / sign and remove any text following it
                                // For example,
                                // ###MS: 4458/4486/
                                // ###MSMS: 4459/4488/
                                // The / sign is used to indicate that several MS/MS scans were combined to make the given spectrum; we'll just keep the first one
                                var charIndex = lineIn.IndexOf('/');

                                if (charIndex > 0)
                                {
                                    string temp;

                                    if (charIndex < lineIn.Length - 1)
                                    {
                                        temp = lineIn.Substring(charIndex + 1).Trim();
                                    }
                                    else
                                    {
                                        temp = string.Empty;
                                    }

                                    lineIn = lineIn.Substring(0, charIndex).Trim();
                                    mCurrentSpectrum.ScanCount = 1;

                                    if (temp.Length > 0)
                                    {
                                        while (true)
                                        {
                                            charIndex = temp.IndexOf('/');

                                            if (charIndex > 0)
                                            {
                                                mCurrentSpectrum.ScanCount++;

                                                if (charIndex < temp.Length - 1)
                                                {
                                                    temp = temp.Substring(charIndex + 1).Trim();
                                                }
                                                else
                                                {
                                                    temp = temp.Substring(0, charIndex).Trim();
                                                    break;
                                                }
                                            }
                                            else
                                            {
                                                break;
                                            }
                                        }

                                        if (IsNumber(temp))
                                        {
                                            mCurrentSpectrum.ScanNumberEnd = int.Parse(temp);
                                        }
                                    }
                                }

                                scanNumberFound = ExtractScanRange(lineIn, mCurrentSpectrum);
                            }
                        }

                        // Line does not start with a comment character
                        // Look for LINE_START_BEGIN_IONS in lineIn
                        else if (lineIn.StartsWith(LINE_START_BEGIN_IONS, StringComparison.OrdinalIgnoreCase))
                        {
                            if (!scanNumberFound)
                            {
                                // Need to update scanNumberStart
                                // Set it to one more than mScanNumberStartSaved
                                mCurrentSpectrum.ScanNumber    = mScanNumberStartSaved + 1;
                                mCurrentSpectrum.ScanNumberEnd = mCurrentSpectrum.ScanNumber;
                                mCurrentSpectrum.SpectrumID    = mCurrentSpectrum.ScanNumber;
                                mCurrentSpectrum.ScanCount     = 1;
                            }

                            var parentIonFound = false;

                            // We have found an MS/MS scan
                            // Look for LINE_START_PEPMASS and LINE_START_CHARGE to determine the parent ion m/z and charge
                            while (mFileReader.Peek() > -1)
                            {
                                lineIn = mFileReader.ReadLine();
                                mInFileLineNumber++;

                                if (lineIn == null)
                                {
                                    continue;
                                }

                                mTotalBytesRead += lineIn.Length + 2;
                                AddNewRecentFileText(lineIn);

                                if (lineIn.Trim().Length == 0)
                                {
                                    continue;
                                }

                                lineIn = lineIn.Trim();
                                string[] splitLine;

                                if (lineIn.StartsWith(LINE_START_PEPMASS, StringComparison.OrdinalIgnoreCase))
                                {
                                    // This line defines the peptide mass as an m/z value
                                    // It may simply contain the m/z value, or it may also contain an intensity value
                                    // The two values will be separated by a space or a tab
                                    // We do not save the intensity value since it cannot be included in a .Dta file
                                    lineIn    = lineIn.Substring(LINE_START_PEPMASS.Length).Trim();
                                    splitLine = lineIn.Split(sepChars);

                                    if (splitLine.Length > 0 && IsNumber(splitLine[0]))
                                    {
                                        mCurrentSpectrum.ParentIonMZ = double.Parse(splitLine[0]);
                                        parentIonFound = true;
                                    }
                                    else
                                    {
                                        // Invalid LINE_START_PEPMASS Line
                                        // Ignore this entire scan
                                        break;
                                    }
                                }
                                else if (lineIn.StartsWith(LINE_START_CHARGE, StringComparison.OrdinalIgnoreCase))
                                {
                                    // This line defines the peptide charge
                                    // It may simply contain a single charge, like 1+ or 2+
                                    // It may also contain two charges, as in 2+ and 3+
                                    // Not all spectra in the MGF file will have a CHARGE= entry
                                    lineIn = lineIn.Substring(LINE_START_CHARGE.Length).Trim();

                                    // Remove any + signs in the line
                                    lineIn = lineIn.Replace("+", string.Empty);

                                    if (lineIn.IndexOf(' ') > 0)
                                    {
                                        // Multiple charges may be present
                                        splitLine = lineIn.Split(sepChars);
                                        var indexEnd = splitLine.Length - 1;

                                        for (var index = 0; index <= indexEnd; index++)
                                        {
                                            // Step through the split line and add any numbers to the charge list
                                            // Typically, splitLine(1) will contain "and"
                                            if (IsNumber(splitLine[index].Trim()))
                                            {
                                                if (mCurrentSpectrum.ParentIonChargeCount < SpectrumInfoMsMsText.MAX_CHARGE_COUNT)
                                                {
                                                    mCurrentSpectrum.ParentIonCharges[mCurrentSpectrum.ParentIonChargeCount] =
                                                        int.Parse(splitLine[index].Trim());

                                                    mCurrentSpectrum.ParentIonChargeCount++;
                                                }
                                            }
                                        }
                                    }
                                    else if (IsNumber(lineIn))
                                    {
                                        mCurrentSpectrum.ParentIonChargeCount = 1;
                                        mCurrentSpectrum.ParentIonCharges[0]  = int.Parse(lineIn);
                                    }
                                }
                                else if (lineIn.StartsWith(LINE_START_TITLE, StringComparison.OrdinalIgnoreCase))
                                {
                                    mCurrentSpectrum.SpectrumTitle = lineIn;
                                    lineIn = lineIn.Substring(LINE_START_TITLE.Length).Trim();
                                    mCurrentSpectrum.SpectrumTitleWithCommentChars = lineIn;

                                    if (!scanNumberFound)
                                    {
                                        // We didn't find a scan number in a ### MSMS: comment line
                                        // Attempt to extract out the scan numbers from the Title
                                        {
                                            ExtractScanInfoFromDtaHeader(lineIn, out var scanNumberStart, out var scanNumberEnd, out var scanCount);
                                            mCurrentSpectrum.ScanNumber    = scanNumberStart;
                                            mCurrentSpectrum.ScanNumberEnd = scanNumberEnd;
                                            mCurrentSpectrum.ScanCount     = scanCount;
                                        }
                                    }
                                }
                                else if (lineIn.StartsWith(LINE_START_END_IONS, StringComparison.OrdinalIgnoreCase))
                                {
                                    // Empty ion list
                                    break;
                                }
                                else if (lineIn.StartsWith(LINE_START_RT, StringComparison.OrdinalIgnoreCase))
                                {
                                    lineIn = lineIn.Substring(LINE_START_RT.Length).Trim();

                                    if (double.TryParse(lineIn, out var rtSeconds))
                                    {
                                        mCurrentSpectrum.RetentionTimeMin = (float)(rtSeconds / 60.0d);
                                    }
                                }
                                else if (lineIn.StartsWith(LINE_START_SCANS, StringComparison.OrdinalIgnoreCase))
                                {
                                    lineIn          = lineIn.Substring(LINE_START_SCANS.Length).Trim();
                                    scanNumberFound = ExtractScanRange(lineIn, mCurrentSpectrum);
                                }
                                else if (char.IsNumber(lineIn, 0))
                                {
                                    // Found the start of the ion list
                                    // Add to the MsMs data list
                                    if (parentIonFound)
                                    {
                                        mCurrentMsMsDataList.Add(lineIn);
                                    }

                                    break;
                                }
                            }

                            if (parentIonFound && mCurrentMsMsDataList.Count > 0)
                            {
                                // We have determined the parent ion

                                // Note: MGF files have Parent Ion MZ defined but not Parent Ion MH
                                // Thus, compute .ParentIonMH using .ParentIonMZ
                                {
                                    if (mCurrentSpectrum.ParentIonChargeCount >= 1)
                                    {
                                        mCurrentSpectrum.ParentIonMH = ConvoluteMass(mCurrentSpectrum.ParentIonMZ, mCurrentSpectrum.ParentIonCharges[0], 1);
                                    }
                                    else
                                    {
                                        mCurrentSpectrum.ParentIonMH = mCurrentSpectrum.ParentIonMZ;
                                    }
                                }

                                // Read in the ions and populate mCurrentMsMsDataList
                                // Read all of the MS/MS spectrum ions up to the next blank line or up to LINE_START_END_IONS
                                while (mFileReader.Peek() > -1)
                                {
                                    lineIn = mFileReader.ReadLine();
                                    mInFileLineNumber++;

                                    // See if lineIn is blank
                                    if (lineIn != null)
                                    {
                                        mTotalBytesRead += lineIn.Length + 2;
                                        AddNewRecentFileText(lineIn);

                                        if (lineIn.Trim().Length > 0)
                                        {
                                            if (lineIn.Trim().StartsWith(LINE_START_END_IONS, StringComparison.OrdinalIgnoreCase))
                                            {
                                                break;
                                            }

                                            // Add to MS/MS data sting list
                                            mCurrentMsMsDataList.Add(lineIn.Trim());
                                        }
                                    }

                                    if (mInFileLineNumber - lastProgressUpdateLine >= 250)
                                    {
                                        lastProgressUpdateLine = mInFileLineNumber;
                                        UpdateStreamReaderProgress();
                                    }
                                }

                                spectrumFound = true;

                                mCurrentSpectrum.ClearMzAndIntensityData();

                                if (ReadTextDataOnly)
                                {
                                    // Do not parse the text data to populate .MZList and .IntensityList
                                    mCurrentSpectrum.PeaksCount = 0;
                                }
                                else
                                {
                                    try
                                    {
                                        ParseMsMsDataList(mCurrentMsMsDataList, out var mzList, out var intensityList);

                                        mCurrentSpectrum.PeaksCount = mzList.Count;
                                        mCurrentSpectrum.StoreIons(mzList, intensityList);

                                        mCurrentSpectrum.Validate(true, true);
                                    }
                                    catch (Exception ex)
                                    {
                                        mCurrentSpectrum.PeaksCount = 0;
                                        spectrumFound = false;
                                    }
                                }
                            }

                            // Copy the scan number to mScanNumberStartSaved
                            if (mCurrentSpectrum.ScanNumber > 0)
                            {
                                mScanNumberStartSaved = mCurrentSpectrum.ScanNumber;
                            }
                        }
                    }

                    if (mInFileLineNumber - lastProgressUpdateLine < 250 && !spectrumFound)
                    {
                        continue;
                    }

                    lastProgressUpdateLine = mInFileLineNumber;

                    if (mFileReader is StreamReader streamReader)
                    {
                        UpdateProgress(streamReader.BaseStream.Position / (double)streamReader.BaseStream.Length * 100.0d);
                    }
                    else if (mInFileStreamLength > 0L)
                    {
                        UpdateProgress(mTotalBytesRead / (double)mInFileStreamLength * 100.0d);
                    }
                }

                spectrumInfo = mCurrentSpectrum;

                if (spectrumFound)
                {
                    mScanCountRead++;

                    if (!ReadingAndStoringSpectra)
                    {
                        if (mInputFileStats.ScanCount < mScanCountRead)
                        {
                            mInputFileStats.ScanCount = mScanCountRead;
                        }

                        UpdateFileStats(mInputFileStats.ScanCount, spectrumInfo.ScanNumber, false);
                    }
                }
            }
            catch (Exception ex)
            {
                OnErrorEvent("Error in ReadNextSpectrum", ex);
                spectrumInfo = new SpectrumInfoMsMsText();
            }

            return(spectrumFound);
        }