private bool CompareSpectraBinData(
            clsCorrelation dataComparer,
            clsMSSpectrum fragSpectrum,
            clsBinnedData binnedSpectrum)
        {
            var xData = new List <float>(fragSpectrum.IonCount);
            var yData = new List <float>(fragSpectrum.IonCount);

            // Make a copy of the data, excluding any Reporter Ion data

            for (var index = 0; index < fragSpectrum.IonCount; index++)
            {
                if (!clsUtilities.CheckPointInMZIgnoreRange(fragSpectrum.IonsMZ[index],
                                                            mReporterIons.MZIntensityFilterIgnoreRangeStart,
                                                            mReporterIons.MZIntensityFilterIgnoreRangeEnd))
                {
                    xData.Add((float)(fragSpectrum.IonsMZ[index]));
                    yData.Add((float)(fragSpectrum.IonsIntensity[index]));
                }
            }

            binnedSpectrum.BinnedDataStartX = dataComparer.BinStartX;
            binnedSpectrum.BinSize          = dataComparer.BinSize;

            // Note that the data in xData and yData should have already been filtered to discard data points below the noise threshold intensity
            var success = dataComparer.BinData(xData, yData, binnedSpectrum.BinnedIntensities, binnedSpectrum.BinnedIntensitiesOffset);

            return(success);
        }
        private float CompareSpectra(
            clsMSSpectrum fragSpectrum1,
            clsMSSpectrum fragSpectrum2,
            clsBinningOptions binningOptions,
            bool considerOffsetBinnedData = true)
        {
            // Compares the two spectra and returns a similarity score (ranging from 0 to 1)
            // Perfect match is 1; no similarity is 0
            // Note that both the standard binned data and the offset binned data are compared
            // If considerOffsetBinnedData = True, then the larger of the two scores is returned
            // similarity scores is returned
            //
            // If an error, returns -1

            var binnedSpectrum1 = new clsBinnedData();
            var binnedSpectrum2 = new clsBinnedData();

            try
            {
                var dataComparer = new clsCorrelation(binningOptions);
                RegisterEvents(dataComparer);

                const clsCorrelation.cmCorrelationMethodConstants eCorrelationMethod = clsCorrelation.cmCorrelationMethodConstants.Pearson;

                // Bin the data in the first spectrum
                var success = CompareSpectraBinData(dataComparer, fragSpectrum1, binnedSpectrum1);
                if (!success)
                {
                    return(-1);
                }

                // Bin the data in the second spectrum
                success = CompareSpectraBinData(dataComparer, fragSpectrum2, binnedSpectrum2);
                if (!success)
                {
                    return(-1);
                }

                // Now compare the binned spectra
                // Similarity will be 0 if either instance of BinnedIntensities has fewer than 5 data points
                var similarity1 = dataComparer.Correlate(binnedSpectrum1.BinnedIntensities, binnedSpectrum2.BinnedIntensities, eCorrelationMethod);

                if (!considerOffsetBinnedData)
                {
                    return(similarity1);
                }

                var similarity2 = dataComparer.Correlate(binnedSpectrum1.BinnedIntensitiesOffset, binnedSpectrum2.BinnedIntensitiesOffset, eCorrelationMethod);
                return(Math.Max(similarity1, similarity2));
            }
            catch (Exception ex)
            {
                ReportError("CompareSpectra: " + ex.Message, ex);
                return(-1);
            }
        }
Example #3
0
        /// <summary>
        /// When returnMax is false, determine the sum of the data within the search mass tolerance
        /// When returnMaxis true, determine the maximum of the data within the search mass tolerance
        /// </summary>
        /// <param name="msSpectrum"></param>
        /// <param name="searchMZ"></param>
        /// <param name="searchToleranceHalfWidth"></param>
        /// <param name="ionMatchCount"></param>
        /// <param name="closestMZ"></param>
        /// <param name="returnMax"></param>
        /// <returns>The sum or maximum of the matching data; 0 if no matches</returns>
        /// <remarks>
        /// Note that this function performs a recursive search of msSpectrum.IonsMZ
        /// It is therefore very efficient regardless of the number of data points in the spectrum
        /// For sparse spectra, you can alternatively use FindMaxValueInMZRange
        /// </remarks>
        public double AggregateIonsInRange(
            clsMSSpectrum msSpectrum,
            double searchMZ,
            double searchToleranceHalfWidth,
            out int ionMatchCount,
            out double closestMZ,
            bool returnMax)
        {
            ionMatchCount = 0;
            closestMZ     = 0;
            double ionSumOrMax = 0;

            try
            {
                var smallestDifference = double.MaxValue;

                if (msSpectrum.IonsMZ != null && msSpectrum.IonCount > 0)
                {
                    if (SumIonsFindValueInRange(msSpectrum.IonsMZ, searchMZ, searchToleranceHalfWidth, out var indexFirst, out var indexLast))
                    {
                        for (var ionIndex = indexFirst; ionIndex <= indexLast; ionIndex++)
                        {
                            if (returnMax)
                            {
                                // Return max
                                if (msSpectrum.IonsIntensity[ionIndex] > ionSumOrMax)
                                {
                                    ionSumOrMax = msSpectrum.IonsIntensity[ionIndex];
                                }
                            }
                            else
                            {
                                // Return sum
                                ionSumOrMax += msSpectrum.IonsIntensity[ionIndex];
                            }

                            var testDifference = Math.Abs(msSpectrum.IonsMZ[ionIndex] - searchMZ);
                            if (testDifference < smallestDifference)
                            {
                                smallestDifference = testDifference;
                                closestMZ          = msSpectrum.IonsMZ[ionIndex];
                            }
                        }

                        ionMatchCount = indexLast - indexFirst + 1;
                    }
                }
            }
            catch (Exception)
            {
                ionMatchCount = 0;
            }

            return(ionSumOrMax);
        }
Example #4
0
        public void ReplaceData(clsMSSpectrum spectrum, int scanNumberOverride)
        {
            ScanNumber = spectrum.ScanNumber;
            if (ScanNumber != scanNumberOverride)
            {
                ScanNumber = scanNumberOverride;
            }

            IonsMZ.Clear();
            IonsIntensity.Clear();

            if (IonsMZ.Capacity / 2 > spectrum.IonsMZ.Count)
            {
                IonsMZ.Capacity        = spectrum.IonsMZ.Count;
                IonsIntensity.Capacity = spectrum.IonsIntensity.Count;
            }

            IonsMZ.AddRange(spectrum.IonsMZ);
            IonsIntensity.AddRange(spectrum.IonsIntensity);
        }
Example #5
0
        private void ComputeNoiseLevelForMassSpectrum(
            clsScanInfo scanInfo,
            clsMSSpectrum msSpectrum,
            clsBaselineNoiseOptions noiseThresholdOptions)
        {
            const bool IGNORE_NON_POSITIVE_DATA = true;

            scanInfo.BaselineNoiseStats = clsMASICPeakFinder.InitializeBaselineNoiseStats(0, noiseThresholdOptions.BaselineNoiseMode);

            if (noiseThresholdOptions.BaselineNoiseMode == clsMASICPeakFinder.eNoiseThresholdModes.AbsoluteThreshold)
            {
                scanInfo.BaselineNoiseStats.NoiseLevel = noiseThresholdOptions.BaselineNoiseLevelAbsolute;
                scanInfo.BaselineNoiseStats.PointsUsed = 1;
            }
            else if (msSpectrum.IonCount > 0)
            {
                mPeakFinder.ComputeTrimmedNoiseLevel(
                    msSpectrum.IonsIntensity, 0, msSpectrum.IonCount - 1,
                    noiseThresholdOptions, IGNORE_NON_POSITIVE_DATA,
                    out var newBaselineNoiseStats);

                scanInfo.BaselineNoiseStats = newBaselineNoiseStats;
            }
        }
Example #6
0
        public clsMSSpectrum Copy(clsMSSpectrum sourceSpectrum)
        {
            var newSpectrum = new clsMSSpectrum(sourceSpectrum.ScanNumber, sourceSpectrum.IonsMZ, sourceSpectrum.IonsIntensity, sourceSpectrum.IonsMZ.Count);

            return(newSpectrum);
        }
Example #7
0
        public bool ProcessAndStoreSpectrum(
            clsScanInfo scanInfo,
            DataInput.clsDataImport dataImportUtilities,
            clsSpectraCache spectraCache,
            clsMSSpectrum msSpectrum,
            clsBaselineNoiseOptions noiseThresholdOptions,
            bool discardLowIntensityData,
            bool compressData,
            double msDataResolution,
            bool keepRawSpectrum)
        {
            var lastKnownLocation = "Start";

            try
            {
                // Determine the noise threshold intensity for this spectrum
                // Stored in scanInfo.BaselineNoiseStats
                lastKnownLocation = "Call ComputeNoiseLevelForMassSpectrum";
                ComputeNoiseLevelForMassSpectrum(scanInfo, msSpectrum, noiseThresholdOptions);

                if (!keepRawSpectrum)
                {
                    return(true);
                }

                // Discard low intensity data, but not for MRM scans
                if (discardLowIntensityData && scanInfo.MRMScanType == ThermoRawFileReader.MRMScanTypeConstants.NotMRM)
                {
                    // Discard data below the noise level or below the minimum S/N level
                    // If we are searching for Reporter ions, then it is important to not discard any of the ions in the region of the reporter ion m/z values
                    lastKnownLocation = "Call DiscardDataBelowNoiseThreshold";
                    dataImportUtilities.DiscardDataBelowNoiseThreshold(msSpectrum,
                                                                       scanInfo.BaselineNoiseStats.NoiseLevel,
                                                                       mReporterIons.MZIntensityFilterIgnoreRangeStart,
                                                                       mReporterIons.MZIntensityFilterIgnoreRangeEnd,
                                                                       noiseThresholdOptions);

                    scanInfo.IonCount = msSpectrum.IonCount;
                }

                if (compressData)
                {
                    lastKnownLocation = "Call CompressSpectraData";
                    // Again, if we are searching for Reporter ions, then it is important to not discard any of the ions in the region of the reporter ion m/z values
                    CompressSpectraData(msSpectrum, msDataResolution,
                                        mReporterIons.MZIntensityFilterIgnoreRangeStart,
                                        mReporterIons.MZIntensityFilterIgnoreRangeEnd);
                }

                if (msSpectrum.IonCount > MAX_ALLOWABLE_ION_COUNT)
                {
                    // Do not keep more than 50,000 ions
                    lastKnownLocation = "Call DiscardDataToLimitIonCount";
                    mSpectraFoundExceedingMaxIonCount += 1;

                    // Display a message at the console the first 10 times we encounter spectra with over MAX_ALLOWABLE_ION_COUNT ions
                    // In addition, display a new message every time a new max value is encountered
                    if (mSpectraFoundExceedingMaxIonCount <= 10 || msSpectrum.IonCount > mMaxIonCountReported)
                    {
                        Console.WriteLine();
                        Console.WriteLine(
                            "Note: Scan " + scanInfo.ScanNumber + " has " + msSpectrum.IonCount + " ions; " +
                            "will only retain " + MAX_ALLOWABLE_ION_COUNT + " (trimmed " +
                            mSpectraFoundExceedingMaxIonCount.ToString() + " spectra)");

                        mMaxIonCountReported = msSpectrum.IonCount;
                    }

                    dataImportUtilities.DiscardDataToLimitIonCount(msSpectrum,
                                                                   mReporterIons.MZIntensityFilterIgnoreRangeStart,
                                                                   mReporterIons.MZIntensityFilterIgnoreRangeEnd,
                                                                   MAX_ALLOWABLE_ION_COUNT);

                    scanInfo.IonCount = msSpectrum.IonCount;
                }

                lastKnownLocation = "Call AddSpectrumToPool";
                var success = spectraCache.AddSpectrumToPool(msSpectrum, scanInfo.ScanNumber);

                return(success);
            }
            catch (Exception ex)
            {
                ReportError("Error in ProcessAndStoreSpectrum (LastKnownLocation: " + lastKnownLocation + ")", ex, clsMASIC.eMasicErrorCodes.InputFileDataReadError);
                return(false);
            }
        }
Example #8
0
        private void CompressSpectraData(
            clsMSSpectrum msSpectrum,
            double msDataResolution,
            double mzIgnoreRangeStart,
            double mzIgnoreRangeEnd)
        {
            // First, look for blocks of data points that consecutively have an intensity value of 0
            // For each block of data found, reduce the data to only retain the first data point and last data point in the block
            //
            // Next, look for data points in msSpectrum that are within msDataResolution units of one another (m/z units)
            // If found, combine into just one data point, keeping the largest intensity and the m/z value corresponding to the largest intensity

            if (msSpectrum.IonCount <= 1)
            {
                return;
            }

            // Look for blocks of data points that all have an intensity value of 0
            var targetIndex = 0;
            var index       = 0;

            while (index < msSpectrum.IonCount)
            {
                if (msSpectrum.IonsIntensity[index] < float.Epsilon)
                {
                    var countCombined = 0;
                    for (var comparisonIndex = index + 1; comparisonIndex < msSpectrum.IonCount; comparisonIndex++)
                    {
                        if (msSpectrum.IonsIntensity[comparisonIndex] < float.Epsilon)
                        {
                            countCombined += 1;
                        }
                        else
                        {
                            break;
                        }
                    }

                    if (countCombined > 1)
                    {
                        // Only keep the first and last data point in the block

                        msSpectrum.IonsMZ[targetIndex]        = msSpectrum.IonsMZ[index];
                        msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index];

                        targetIndex += 1;
                        msSpectrum.IonsMZ[targetIndex]        = msSpectrum.IonsMZ[index + countCombined];
                        msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index + countCombined];

                        index += countCombined;
                    }
                    // Keep this data point since a single zero
                    else if (targetIndex != index)
                    {
                        msSpectrum.IonsMZ[targetIndex]        = msSpectrum.IonsMZ[index];
                        msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index];
                    }
                }

                // Note: targetIndex will be the same as index until the first time that data is combined (countCombined > 0)
                // After that, targetIndex will always be less than index and we will thus always need to copy data
                else if (targetIndex != index)
                {
                    msSpectrum.IonsMZ[targetIndex]        = msSpectrum.IonsMZ[index];
                    msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index];
                }

                index       += 1;
                targetIndex += 1;
            }

            // Update .IonCount with the new data count
            msSpectrum.ShrinkArrays(targetIndex);

            // Step through the data, consolidating data within msDataResolution
            // Note that we're copying in place rather than making a new, duplicate array
            // If the m/z value is between mzIgnoreRangeStart and mzIgnoreRangeEnd, then we will not compress the data

            targetIndex = 0;
            index       = 0;

            while (index < msSpectrum.IonCount)
            {
                var countCombined = 0;
                var bestMz        = msSpectrum.IonsMZ[index];

                // Only combine data if the first data point has a positive intensity value
                if (msSpectrum.IonsIntensity[index] > 0)
                {
                    var pointInIgnoreRange = clsUtilities.CheckPointInMZIgnoreRange(msSpectrum.IonsMZ[index], mzIgnoreRangeStart, mzIgnoreRangeEnd);

                    if (!pointInIgnoreRange)
                    {
                        for (var comparisonIndex = index + 1; comparisonIndex < msSpectrum.IonCount; comparisonIndex++)
                        {
                            if (clsUtilities.CheckPointInMZIgnoreRange(msSpectrum.IonsMZ[comparisonIndex], mzIgnoreRangeStart, mzIgnoreRangeEnd))
                            {
                                // Reached the ignore range; do not allow to be combined with the current data point
                                break;
                            }

                            if (msSpectrum.IonsMZ[comparisonIndex] - msSpectrum.IonsMZ[index] < msDataResolution)
                            {
                                if (msSpectrum.IonsIntensity[comparisonIndex] > msSpectrum.IonsIntensity[index])
                                {
                                    msSpectrum.IonsIntensity[index] = msSpectrum.IonsIntensity[comparisonIndex];
                                    bestMz = msSpectrum.IonsMZ[comparisonIndex];
                                }

                                countCombined += 1;
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }

                // Note: targetIndex will be the same as index until the first time that data is combined (countCombined > 0)
                // After that, targetIndex will always be less than index and we will thus always need to copy data
                if (targetIndex != index || countCombined > 0)
                {
                    msSpectrum.IonsMZ[targetIndex]        = bestMz;
                    msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index];

                    index += countCombined;
                }

                index       += 1;
                targetIndex += 1;
            }

            // Update .IonCount with the new data count
            msSpectrum.ShrinkArrays(targetIndex);
        }