private bool CompareSpectraBinData( clsCorrelation dataComparer, clsMSSpectrum fragSpectrum, clsBinnedData binnedSpectrum) { var xData = new List <float>(fragSpectrum.IonCount); var yData = new List <float>(fragSpectrum.IonCount); // Make a copy of the data, excluding any Reporter Ion data for (var index = 0; index < fragSpectrum.IonCount; index++) { if (!clsUtilities.CheckPointInMZIgnoreRange(fragSpectrum.IonsMZ[index], mReporterIons.MZIntensityFilterIgnoreRangeStart, mReporterIons.MZIntensityFilterIgnoreRangeEnd)) { xData.Add((float)(fragSpectrum.IonsMZ[index])); yData.Add((float)(fragSpectrum.IonsIntensity[index])); } } binnedSpectrum.BinnedDataStartX = dataComparer.BinStartX; binnedSpectrum.BinSize = dataComparer.BinSize; // Note that the data in xData and yData should have already been filtered to discard data points below the noise threshold intensity var success = dataComparer.BinData(xData, yData, binnedSpectrum.BinnedIntensities, binnedSpectrum.BinnedIntensitiesOffset); return(success); }
private float CompareSpectra( clsMSSpectrum fragSpectrum1, clsMSSpectrum fragSpectrum2, clsBinningOptions binningOptions, bool considerOffsetBinnedData = true) { // Compares the two spectra and returns a similarity score (ranging from 0 to 1) // Perfect match is 1; no similarity is 0 // Note that both the standard binned data and the offset binned data are compared // If considerOffsetBinnedData = True, then the larger of the two scores is returned // similarity scores is returned // // If an error, returns -1 var binnedSpectrum1 = new clsBinnedData(); var binnedSpectrum2 = new clsBinnedData(); try { var dataComparer = new clsCorrelation(binningOptions); RegisterEvents(dataComparer); const clsCorrelation.cmCorrelationMethodConstants eCorrelationMethod = clsCorrelation.cmCorrelationMethodConstants.Pearson; // Bin the data in the first spectrum var success = CompareSpectraBinData(dataComparer, fragSpectrum1, binnedSpectrum1); if (!success) { return(-1); } // Bin the data in the second spectrum success = CompareSpectraBinData(dataComparer, fragSpectrum2, binnedSpectrum2); if (!success) { return(-1); } // Now compare the binned spectra // Similarity will be 0 if either instance of BinnedIntensities has fewer than 5 data points var similarity1 = dataComparer.Correlate(binnedSpectrum1.BinnedIntensities, binnedSpectrum2.BinnedIntensities, eCorrelationMethod); if (!considerOffsetBinnedData) { return(similarity1); } var similarity2 = dataComparer.Correlate(binnedSpectrum1.BinnedIntensitiesOffset, binnedSpectrum2.BinnedIntensitiesOffset, eCorrelationMethod); return(Math.Max(similarity1, similarity2)); } catch (Exception ex) { ReportError("CompareSpectra: " + ex.Message, ex); return(-1); } }
/// <summary> /// When returnMax is false, determine the sum of the data within the search mass tolerance /// When returnMaxis true, determine the maximum of the data within the search mass tolerance /// </summary> /// <param name="msSpectrum"></param> /// <param name="searchMZ"></param> /// <param name="searchToleranceHalfWidth"></param> /// <param name="ionMatchCount"></param> /// <param name="closestMZ"></param> /// <param name="returnMax"></param> /// <returns>The sum or maximum of the matching data; 0 if no matches</returns> /// <remarks> /// Note that this function performs a recursive search of msSpectrum.IonsMZ /// It is therefore very efficient regardless of the number of data points in the spectrum /// For sparse spectra, you can alternatively use FindMaxValueInMZRange /// </remarks> public double AggregateIonsInRange( clsMSSpectrum msSpectrum, double searchMZ, double searchToleranceHalfWidth, out int ionMatchCount, out double closestMZ, bool returnMax) { ionMatchCount = 0; closestMZ = 0; double ionSumOrMax = 0; try { var smallestDifference = double.MaxValue; if (msSpectrum.IonsMZ != null && msSpectrum.IonCount > 0) { if (SumIonsFindValueInRange(msSpectrum.IonsMZ, searchMZ, searchToleranceHalfWidth, out var indexFirst, out var indexLast)) { for (var ionIndex = indexFirst; ionIndex <= indexLast; ionIndex++) { if (returnMax) { // Return max if (msSpectrum.IonsIntensity[ionIndex] > ionSumOrMax) { ionSumOrMax = msSpectrum.IonsIntensity[ionIndex]; } } else { // Return sum ionSumOrMax += msSpectrum.IonsIntensity[ionIndex]; } var testDifference = Math.Abs(msSpectrum.IonsMZ[ionIndex] - searchMZ); if (testDifference < smallestDifference) { smallestDifference = testDifference; closestMZ = msSpectrum.IonsMZ[ionIndex]; } } ionMatchCount = indexLast - indexFirst + 1; } } } catch (Exception) { ionMatchCount = 0; } return(ionSumOrMax); }
public void ReplaceData(clsMSSpectrum spectrum, int scanNumberOverride) { ScanNumber = spectrum.ScanNumber; if (ScanNumber != scanNumberOverride) { ScanNumber = scanNumberOverride; } IonsMZ.Clear(); IonsIntensity.Clear(); if (IonsMZ.Capacity / 2 > spectrum.IonsMZ.Count) { IonsMZ.Capacity = spectrum.IonsMZ.Count; IonsIntensity.Capacity = spectrum.IonsIntensity.Count; } IonsMZ.AddRange(spectrum.IonsMZ); IonsIntensity.AddRange(spectrum.IonsIntensity); }
private void ComputeNoiseLevelForMassSpectrum( clsScanInfo scanInfo, clsMSSpectrum msSpectrum, clsBaselineNoiseOptions noiseThresholdOptions) { const bool IGNORE_NON_POSITIVE_DATA = true; scanInfo.BaselineNoiseStats = clsMASICPeakFinder.InitializeBaselineNoiseStats(0, noiseThresholdOptions.BaselineNoiseMode); if (noiseThresholdOptions.BaselineNoiseMode == clsMASICPeakFinder.eNoiseThresholdModes.AbsoluteThreshold) { scanInfo.BaselineNoiseStats.NoiseLevel = noiseThresholdOptions.BaselineNoiseLevelAbsolute; scanInfo.BaselineNoiseStats.PointsUsed = 1; } else if (msSpectrum.IonCount > 0) { mPeakFinder.ComputeTrimmedNoiseLevel( msSpectrum.IonsIntensity, 0, msSpectrum.IonCount - 1, noiseThresholdOptions, IGNORE_NON_POSITIVE_DATA, out var newBaselineNoiseStats); scanInfo.BaselineNoiseStats = newBaselineNoiseStats; } }
public clsMSSpectrum Copy(clsMSSpectrum sourceSpectrum) { var newSpectrum = new clsMSSpectrum(sourceSpectrum.ScanNumber, sourceSpectrum.IonsMZ, sourceSpectrum.IonsIntensity, sourceSpectrum.IonsMZ.Count); return(newSpectrum); }
public bool ProcessAndStoreSpectrum( clsScanInfo scanInfo, DataInput.clsDataImport dataImportUtilities, clsSpectraCache spectraCache, clsMSSpectrum msSpectrum, clsBaselineNoiseOptions noiseThresholdOptions, bool discardLowIntensityData, bool compressData, double msDataResolution, bool keepRawSpectrum) { var lastKnownLocation = "Start"; try { // Determine the noise threshold intensity for this spectrum // Stored in scanInfo.BaselineNoiseStats lastKnownLocation = "Call ComputeNoiseLevelForMassSpectrum"; ComputeNoiseLevelForMassSpectrum(scanInfo, msSpectrum, noiseThresholdOptions); if (!keepRawSpectrum) { return(true); } // Discard low intensity data, but not for MRM scans if (discardLowIntensityData && scanInfo.MRMScanType == ThermoRawFileReader.MRMScanTypeConstants.NotMRM) { // Discard data below the noise level or below the minimum S/N level // If we are searching for Reporter ions, then it is important to not discard any of the ions in the region of the reporter ion m/z values lastKnownLocation = "Call DiscardDataBelowNoiseThreshold"; dataImportUtilities.DiscardDataBelowNoiseThreshold(msSpectrum, scanInfo.BaselineNoiseStats.NoiseLevel, mReporterIons.MZIntensityFilterIgnoreRangeStart, mReporterIons.MZIntensityFilterIgnoreRangeEnd, noiseThresholdOptions); scanInfo.IonCount = msSpectrum.IonCount; } if (compressData) { lastKnownLocation = "Call CompressSpectraData"; // Again, if we are searching for Reporter ions, then it is important to not discard any of the ions in the region of the reporter ion m/z values CompressSpectraData(msSpectrum, msDataResolution, mReporterIons.MZIntensityFilterIgnoreRangeStart, mReporterIons.MZIntensityFilterIgnoreRangeEnd); } if (msSpectrum.IonCount > MAX_ALLOWABLE_ION_COUNT) { // Do not keep more than 50,000 ions lastKnownLocation = "Call DiscardDataToLimitIonCount"; mSpectraFoundExceedingMaxIonCount += 1; // Display a message at the console the first 10 times we encounter spectra with over MAX_ALLOWABLE_ION_COUNT ions // In addition, display a new message every time a new max value is encountered if (mSpectraFoundExceedingMaxIonCount <= 10 || msSpectrum.IonCount > mMaxIonCountReported) { Console.WriteLine(); Console.WriteLine( "Note: Scan " + scanInfo.ScanNumber + " has " + msSpectrum.IonCount + " ions; " + "will only retain " + MAX_ALLOWABLE_ION_COUNT + " (trimmed " + mSpectraFoundExceedingMaxIonCount.ToString() + " spectra)"); mMaxIonCountReported = msSpectrum.IonCount; } dataImportUtilities.DiscardDataToLimitIonCount(msSpectrum, mReporterIons.MZIntensityFilterIgnoreRangeStart, mReporterIons.MZIntensityFilterIgnoreRangeEnd, MAX_ALLOWABLE_ION_COUNT); scanInfo.IonCount = msSpectrum.IonCount; } lastKnownLocation = "Call AddSpectrumToPool"; var success = spectraCache.AddSpectrumToPool(msSpectrum, scanInfo.ScanNumber); return(success); } catch (Exception ex) { ReportError("Error in ProcessAndStoreSpectrum (LastKnownLocation: " + lastKnownLocation + ")", ex, clsMASIC.eMasicErrorCodes.InputFileDataReadError); return(false); } }
private void CompressSpectraData( clsMSSpectrum msSpectrum, double msDataResolution, double mzIgnoreRangeStart, double mzIgnoreRangeEnd) { // First, look for blocks of data points that consecutively have an intensity value of 0 // For each block of data found, reduce the data to only retain the first data point and last data point in the block // // Next, look for data points in msSpectrum that are within msDataResolution units of one another (m/z units) // If found, combine into just one data point, keeping the largest intensity and the m/z value corresponding to the largest intensity if (msSpectrum.IonCount <= 1) { return; } // Look for blocks of data points that all have an intensity value of 0 var targetIndex = 0; var index = 0; while (index < msSpectrum.IonCount) { if (msSpectrum.IonsIntensity[index] < float.Epsilon) { var countCombined = 0; for (var comparisonIndex = index + 1; comparisonIndex < msSpectrum.IonCount; comparisonIndex++) { if (msSpectrum.IonsIntensity[comparisonIndex] < float.Epsilon) { countCombined += 1; } else { break; } } if (countCombined > 1) { // Only keep the first and last data point in the block msSpectrum.IonsMZ[targetIndex] = msSpectrum.IonsMZ[index]; msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index]; targetIndex += 1; msSpectrum.IonsMZ[targetIndex] = msSpectrum.IonsMZ[index + countCombined]; msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index + countCombined]; index += countCombined; } // Keep this data point since a single zero else if (targetIndex != index) { msSpectrum.IonsMZ[targetIndex] = msSpectrum.IonsMZ[index]; msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index]; } } // Note: targetIndex will be the same as index until the first time that data is combined (countCombined > 0) // After that, targetIndex will always be less than index and we will thus always need to copy data else if (targetIndex != index) { msSpectrum.IonsMZ[targetIndex] = msSpectrum.IonsMZ[index]; msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index]; } index += 1; targetIndex += 1; } // Update .IonCount with the new data count msSpectrum.ShrinkArrays(targetIndex); // Step through the data, consolidating data within msDataResolution // Note that we're copying in place rather than making a new, duplicate array // If the m/z value is between mzIgnoreRangeStart and mzIgnoreRangeEnd, then we will not compress the data targetIndex = 0; index = 0; while (index < msSpectrum.IonCount) { var countCombined = 0; var bestMz = msSpectrum.IonsMZ[index]; // Only combine data if the first data point has a positive intensity value if (msSpectrum.IonsIntensity[index] > 0) { var pointInIgnoreRange = clsUtilities.CheckPointInMZIgnoreRange(msSpectrum.IonsMZ[index], mzIgnoreRangeStart, mzIgnoreRangeEnd); if (!pointInIgnoreRange) { for (var comparisonIndex = index + 1; comparisonIndex < msSpectrum.IonCount; comparisonIndex++) { if (clsUtilities.CheckPointInMZIgnoreRange(msSpectrum.IonsMZ[comparisonIndex], mzIgnoreRangeStart, mzIgnoreRangeEnd)) { // Reached the ignore range; do not allow to be combined with the current data point break; } if (msSpectrum.IonsMZ[comparisonIndex] - msSpectrum.IonsMZ[index] < msDataResolution) { if (msSpectrum.IonsIntensity[comparisonIndex] > msSpectrum.IonsIntensity[index]) { msSpectrum.IonsIntensity[index] = msSpectrum.IonsIntensity[comparisonIndex]; bestMz = msSpectrum.IonsMZ[comparisonIndex]; } countCombined += 1; } else { break; } } } } // Note: targetIndex will be the same as index until the first time that data is combined (countCombined > 0) // After that, targetIndex will always be less than index and we will thus always need to copy data if (targetIndex != index || countCombined > 0) { msSpectrum.IonsMZ[targetIndex] = bestMz; msSpectrum.IonsIntensity[targetIndex] = msSpectrum.IonsIntensity[index]; index += countCombined; } index += 1; targetIndex += 1; } // Update .IonCount with the new data count msSpectrum.ShrinkArrays(targetIndex); }