/// <summary> /// Find and identify characteristics of peaks in raw XYData. This includes finding candidate centroid peaks and noise thresholding. /// </summary> /// <param name="rawXYData">raw XYdata</param> /// <returns>Centroided peaks with noise removed</returns> public override Collection <Peak> DetectPeaks(Collection <XYData> collectionRawXYData) { var rawXYData = new List <XYData>(collectionRawXYData); //TODO: Scott Create constructor that accepts parameters. var newPeakCentroider = new PeakCentroider(); newPeakCentroider.Parameters = CentroidParameters; // Find peaks in profile. var centroidedPeakList = new List <ProcessedPeak>(); centroidedPeakList = newPeakCentroider.DiscoverPeaks(rawXYData); //TODO: Scott Create constructor that accepts parameters. var newPeakThresholder = new PeakThresholder(); newPeakThresholder.Parameters = ThresholdParameters; // Separate signal from noise. List <ProcessedPeak> thresholdedData = null; thresholdedData = newPeakThresholder.ApplyThreshold(centroidedPeakList); // Find peaks. var outputPeakList = ProcessedPeak.ToPeaks(thresholdedData); return(outputPeakList); }
/// <summary> /// Find full width at half maximum value at position specified. /// remarks Looks for half height locations at left and right side, and uses twice of that value as the FWHM value. If half height /// locations cannot be found (because of say an overlapping neighbouring peak), we perform interpolations. /// </summary> /// <param name="rawData">data to search in</param> /// <param name="centerIndex">location of center points. Apex of centroided peaks</param> /// <param name="centroidPeak">specific peak to find the FWHM of</param> /// <param name="shoulderNoiseToLettIndex">return location of local minimum to the left</param> /// <param name="shoulderNoiseToRightIndex">return location of local minima to the right</param> /// <param name="lowAbundanceFWHMFitType">which algorithm will we use to calculate hald max value on the side of the peak</param> /// <returns></returns> private double FindFWHM(List <XYData> rawData, int centerIndex, XYData centroidPeak, ref int shoulderNoiseToLeftIndex, ref int shoulderNoiseToRightIndex, PeakFitType lowAbundanceFWHMFitType) { //this bounds the number of points we can used to determine FWHM //int MinimaLeftIndex = (int)storeMinimaData.X;//index lower in mass //int MinimaRightIndex = (int)storeMinimaData.Y;//index higher in mass var MinimaLeftIndex = shoulderNoiseToLeftIndex; //index lower in mass var MinimaRightIndex = shoulderNoiseToRightIndex; //index higher in mass double deltaXRight; //distance of half-width-at-half-maximum to the right double deltaXLeft; //distance of half-width-at-half-maximum to the left //points for steppig through so we can find interpolation bounds double X1CurrentPoint; double Y1CurrentPoint; double X2OnePointAhead; //one less when stepping back from large X and one more when stepping from small X double Y2OnePointAhead; //one less when stepping back from large X and one more when stepping from small X //double Y2OnePointsAhead; //use centroided data for deltaX and halfMaximum calculations var Y0CenterHeight = centroidPeak.Y; var Y0HalfHeight = Y0CenterHeight / 2.0; var X0CenterMass = centroidPeak.X; //initialize heavily used variables double FWHM; //returned answer double A = 0; //ParabolaABC double B = 0; //ParabolaABC double C = 0; //ParabolaABC var detectedMethodLeft = FullWidthHalfMaximumPeakOptions.Unassigned; var detectedMethodRight = FullWidthHalfMaximumPeakOptions.Unassigned; var numPoints = rawData.Count(); //if there is no data if (Y0CenterHeight == 0.0) { return(0.0); } //if we are on the ends of the data if (centerIndex <= 0 || centerIndex >= numPoints - 1) { return(0); } #region look for the Half maximum on the right side deltaXRight = rawData[MinimaRightIndex].X - X0CenterMass; //initialize with maximum it can be without calculations//ths may not be necesary if (rawData[MinimaRightIndex].Y < Y0HalfHeight) //is our minima less than half maximum { #region there are enough data points that we can find the postiion easily. for (var i = MinimaRightIndex; i > centerIndex; i--) //TODO check this centerindex+1? to avoid 0 { Y2OnePointAhead = rawData[i - 1].Y; //look one point ahead if (Y2OnePointAhead > Y0HalfHeight) //is the Yfwhm in the range { X1CurrentPoint = rawData[i].X; Y1CurrentPoint = rawData[i].Y; X2OnePointAhead = rawData[i - 1].X; //we are in range. i is below the half height and i-1 is above var interpolatedX = X1CurrentPoint - (X1CurrentPoint - X2OnePointAhead) * (Y0HalfHeight - Y1CurrentPoint) / (Y2OnePointAhead - Y1CurrentPoint);//TODO check this deltaXRight = interpolatedX - X0CenterMass; detectedMethodRight = FullWidthHalfMaximumPeakOptions.Interpolated; break; } X1CurrentPoint = 0;//break point //this is not needed because we will keep iterating till the answer is found //TODO is there a case for this else block or do we use the default deltaXright asigned above //xcoordinateToRight = X0CenterMass + deltaXRight;//notice this is offset from the center mass not the centroid } #endregion } else//we need to interpolate beyond the data we have to find a theoretical end point { //if there are a few points if (MinimaRightIndex - centerIndex > 2)//three or more points { /// 1. take log of lorentzian data so we can fit a parabola to the peak /// 2. fit parabola /// 3. Use A,B,C from parabola to construct F(log(Y) /// 4. plug log(FWHM) into F() to get deltaX = F(log(FWHM)) #region load up ListXYdata with points from the center so we can fit a parabola double transformedHalfHeight = 0;//this is needed incase the logarithm is taken var peakRightSideList = new List <XYData>(); switch (lowAbundanceFWHMFitType)//for parabola fit, don't take a log. For lorentzian, take a log first { case PeakFitType.Parabola: { for (var i = MinimaLeftIndex; i <= MinimaRightIndex; i++) { var pointTransfer = new XYData(rawData[i].X, rawData[i].Y); peakRightSideList.Add(pointTransfer); } transformedHalfHeight = Y0HalfHeight; } break; case PeakFitType.Lorentzian: { for (var i = MinimaLeftIndex; i <= MinimaRightIndex; i++) { var logY = (float)(Math.Log10(rawData[i].Y)); var pointTransfer = new XYData(rawData[i].X, logY); if (rawData[i].Y > 0) //prevents infinity solution from log10 { peakRightSideList.Add(pointTransfer); } } transformedHalfHeight = Math.Log10(Y0HalfHeight); } break; default: { for (var i = MinimaLeftIndex; i <= MinimaRightIndex; i++) { var pointTransfer = new XYData(rawData[i].X, rawData[i].Y); peakRightSideList.Add(pointTransfer); } transformedHalfHeight = Y0HalfHeight; } break; } #endregion //fit parabola to the data so we can extrapolate the missing FWHM var peakTopCalculation = new PeakCentroider(); peakTopCalculation.ParabolaABC(peakRightSideList, ref A, ref B, ref C); //calculate right X value for half height var squareRootTest = B * B - 4 * A * C + 4 * A * transformedHalfHeight;//must be positive if (squareRootTest > 0) { deltaXRight = -((B / 2 + Math.Sqrt(squareRootTest) / 2) / A); } else { deltaXRight = deltaXRight / 2; //deltaXRight was the max distance on the right side //so as an approximation, "deltaXRight / 2" should be somewhere half way inbetween } detectedMethodRight = FullWidthHalfMaximumPeakOptions.QuadraticExtrapolation; //xcoordinateToRight = rawData[MinimaLeftIndex].X + deltaXRight;//notice the different start point //this is because the parabola starts at the MinLeftIndex rather than the XOCenterMass } else//there are not enough points to fit the parabola, extrapolate a line { //calculate slope and project a line var slope = (Y0CenterHeight - rawData[MinimaRightIndex].Y) / (X0CenterMass - rawData[MinimaRightIndex].X); var intecept = (Y0CenterHeight - slope * X0CenterMass); var regressedX = -((intecept - Y0HalfHeight) / slope); deltaXRight = regressedX - X0CenterMass; detectedMethodRight = FullWidthHalfMaximumPeakOptions.LinearExtrapolation; } } #endregion #region look for the Half maximum on the left side deltaXLeft = X0CenterMass - rawData[MinimaLeftIndex].X; //initialize with maximum it can be//ths may not be necesary if (rawData[MinimaLeftIndex].Y < Y0HalfHeight) //is our minima less than half maximum { #region there are enough data points that we can find the postiion easily for (var i = MinimaLeftIndex; i < centerIndex; i++) //TODO check this centerindex-1? to avoid 0 { Y2OnePointAhead = rawData[i + 1].Y; //look one point ahead if (Y2OnePointAhead > Y0HalfHeight) //is the Yfwhm in the range { X1CurrentPoint = rawData[i].X; Y1CurrentPoint = rawData[i].Y; X2OnePointAhead = rawData[i + 1].X; //we are in range. i is below the half height and i-1 is above var interpolatedX = X1CurrentPoint - (X1CurrentPoint - X2OnePointAhead) * (Y0HalfHeight - Y1CurrentPoint) / (Y2OnePointAhead - Y1CurrentPoint);//TODO check this deltaXLeft = X0CenterMass - interpolatedX; detectedMethodLeft = FullWidthHalfMaximumPeakOptions.Interpolated; break; } X1CurrentPoint = 0;//break point //this is not needed because we will keep iterating till the answer is found //TODO is there a case for this else block or do we use the default deltaXright asigned above } #endregion } else//we need to interpolate beyond the data we have to find a theoretical end point { //if there are a few points if (centerIndex - MinimaLeftIndex > 2) { /// 1. take log of lorentzian data so we can fit a parabola to the peak /// 2. fit parabola /// 3. Use A,B,C from parabola to construct F(log(Y) /// 4. plug log(FWHM) into F() to get deltaX = F(log(FWHM)) #region load up ListXYdata with points from the center so we can fit a parabola double transformedHalfHeight = 0;//this is needed incase the logarithm is taken var peakLeftSideList = new List <XYData>(); switch (lowAbundanceFWHMFitType)//for parabola fit, don't take a log. For lorentzian, take a log first { case PeakFitType.Parabola: { for (var i = MinimaLeftIndex; i <= MinimaRightIndex; i++) { var pointTransfer = new XYData(rawData[i].X, rawData[i].Y); peakLeftSideList.Add(pointTransfer); } transformedHalfHeight = Y0HalfHeight; } break; case PeakFitType.Lorentzian: { for (var i = MinimaLeftIndex; i <= MinimaRightIndex; i++) { var logY = (float)(Math.Log10(rawData[i].Y)); var pointTransfer = new XYData(rawData[i].X, logY); if (rawData[i].Y > 0) //prevents infinity solution from log10 { peakLeftSideList.Add(pointTransfer); } } transformedHalfHeight = Math.Log10(Y0HalfHeight); } break; default: { for (var i = MinimaLeftIndex; i <= MinimaRightIndex; i++) { var pointTransfer = new XYData(rawData[i].X, rawData[i].Y); peakLeftSideList.Add(pointTransfer); } transformedHalfHeight = Y0HalfHeight; } break; } #endregion //fit parabola to the data so we can extrapolate the missing FWHM var peakTopCalculation = new PeakCentroider(); peakTopCalculation.ParabolaABC(peakLeftSideList, ref A, ref B, ref C); //calculate right X value for half height //deltaXLeft = -((B / 2 + Math.Sqrt(B * B - 4 * A * C + 4 * A * transformedHalfHeight) / 2) / A); var squareRootTest = B * B - 4 * A * C + 4 * A * transformedHalfHeight;//must be positive if (squareRootTest > 0) { deltaXLeft = -((B / 2 + Math.Sqrt(squareRootTest) / 2) / A); } else { deltaXLeft = deltaXLeft / 2; //deltaXRight was the max distance on the right side //so as an approximation, "deltaXRight / 2" should be somewhere half way inbetween } detectedMethodLeft = FullWidthHalfMaximumPeakOptions.QuadraticExtrapolation; //xcoordinateToLeft = rawData[MinimaLeftIndex].X + deltaXLeft;//notice the different start point //this is because the parabola starts at the MinLeftIndex rather than the XOCenterMass } else//there are not enough points to fit the parabola, extrapolate a line { //calculate slope and project a line var slope = (Y0CenterHeight - rawData[MinimaLeftIndex].Y) / (X0CenterMass - rawData[MinimaLeftIndex].X); var intecept = (Y0CenterHeight - slope * X0CenterMass); var regressedX = -((intecept - Y0HalfHeight) / slope); deltaXLeft = X0CenterMass - regressedX; detectedMethodLeft = FullWidthHalfMaximumPeakOptions.LinearExtrapolation; } } #endregion if (deltaXRight == 0.0)//if we only have half the data { FWHM = 2 * deltaXLeft; return(FWHM); } if (deltaXLeft == 0.0) { FWHM = 2 * deltaXLeft; return(FWHM); } //If we have a weak linear extrapolation on one half of the peak and a stronger iterpolation or quadratic extrapolation //on the other side it is better to double the interpolation or quadratic extrapolation. if (detectedMethodLeft == FullWidthHalfMaximumPeakOptions.LinearExtrapolation) { if (detectedMethodRight == FullWidthHalfMaximumPeakOptions.QuadraticExtrapolation || detectedMethodRight == FullWidthHalfMaximumPeakOptions.Interpolated) { deltaXLeft = deltaXRight; } } if (detectedMethodRight == FullWidthHalfMaximumPeakOptions.LinearExtrapolation) { if (detectedMethodLeft == FullWidthHalfMaximumPeakOptions.QuadraticExtrapolation || detectedMethodLeft == FullWidthHalfMaximumPeakOptions.Interpolated) { deltaXRight = deltaXLeft; } } FWHM = deltaXLeft + deltaXRight; return(FWHM); }
/// <summary> /// Find candidate peaks in the spectra (incressing and then decreasing). For each peak top, find centroid /// </summary> /// <param name="RawXYData">List of PNNL Omics XYData</param> /// <param name="parameters">parameters needed for the fit</param> public List <ProcessedPeak> DiscoverPeaks(List <XYData> rawXYData) { var resultsListCentroid = new List <ProcessedPeak>(); if (rawXYData == null || rawXYData.Count == 0) { return(resultsListCentroid); } var numPoints = rawXYData.Count; if (Parameters.IsXYDataCentroided) { var width = Convert.ToSingle(Parameters.DefaultFWHMForCentroidedData); foreach (var rawData in rawXYData) { var newPreCentroidedPeak = new ProcessedPeak(); newPreCentroidedPeak.XValue = rawData.X; newPreCentroidedPeak.Height = rawData.Y; newPreCentroidedPeak.Width = width; resultsListCentroid.Add(newPreCentroidedPeak); } } else { // Holds the apex of a fitted parabola. var peakTopParabolaPoints = new List <XYData>(); //TODO: Assert that the number of points is 3, 5, 7? Throw exception if not odd and greater than 3. for (var i = 0; i < Parameters.NumberOfPoints; i++)//number of points must be 3,5,7 { var newPoint = new XYData(0, 0); peakTopParabolaPoints.Add(newPoint); } var centroidedPeak = new XYData(0, 0); for (var i = 1; i < numPoints - 1; i++)//numPoints-1 because of possible overrun error 4 lines down i+=1 { // This loop will look for local differential maxima //TODO: Refactor? while (rawXYData[i].Y > rawXYData[i - 1].Y && i < numPoints - 1) //Is it Still Increasing? { // Look at next peak. i++; if (rawXYData[i].Y < rawXYData[i - 1].Y) // Is it Decreasing? { //peak top data point is at location i-1 var newcentroidPeak = new ProcessedPeak(); //1. find local noise (or shoulder noise) by finding the average fo the local minima on each side of the peak //XYData storeMinimaDataIndex = new XYData();//will contain the index of the locations where the surrounding local mnima are var shoulderNoiseToLeftIndex = 0; var shoulderNoiseToRightIndex = 0; var peakTopCalculation = new PeakCentroider(); newcentroidPeak.LocalLowestMinimaHeight = peakTopCalculation.FindShoulderNoise(ref rawXYData, i - 1, Parameters.DefaultShoulderNoiseValue, ref shoulderNoiseToLeftIndex, ref shoulderNoiseToRightIndex); newcentroidPeak.MinimaOfLowerMassIndex = shoulderNoiseToLeftIndex; newcentroidPeak.MinimaOfHigherMassIndex = shoulderNoiseToRightIndex; newcentroidPeak.LocalHighestMinimaHeight = Convert.ToDouble(Math.Max((decimal)rawXYData[shoulderNoiseToLeftIndex].Y, (decimal)rawXYData[shoulderNoiseToRightIndex].Y)); newcentroidPeak.LocalHighestMinimaHeight = Convert.ToDouble(Math.Max((decimal)newcentroidPeak.LocalHighestMinimaHeight, 1)); //takes care of the 0 condition if (rawXYData[i].Y > rawXYData[i - 2].Y) //decide which flanking point is lower. the higher will have the max closer to it. i-1 is the max point { newcentroidPeak.CenterIndexLeft = i - 1; //this is interesting because we always return the point just to the left of the parabola apex } else { newcentroidPeak.CenterIndexLeft = i - 2; } //2. centroid peaks via fitting a parabola //TODO: decide if sending indexes is better becaus the modulariy of the parabola finder will be broken //store points to go to the parabola fitter for (var j = 0; j < Parameters.NumberOfPoints; j += 1) { var index = i - 1 - (int)(Parameters.NumberOfPoints / (float)2 - (float)0.5) + j;//since number of points is 3,5,7 it will divide nicely peakTopParabolaPoints[j] = rawXYData[index]; } //calculate parabola apex returning int and centroided MZ centroidedPeak = peakTopCalculation.Parabola(peakTopParabolaPoints); newcentroidPeak.XValue = centroidedPeak.X; newcentroidPeak.Height = centroidedPeak.Y; //if it fails, we simply select the center peak. This fails when the three y values are very very similar (within the tolerence of a single) if (double.IsNaN(newcentroidPeak.Height)) { newcentroidPeak.XValue = peakTopParabolaPoints[1].X; newcentroidPeak.Height = peakTopParabolaPoints[1].Y; } //if(double.IsPositiveInfinity(newcentroidPeak.XValue) || double.IsNegativeInfinity(newcentroidPeak.XValue) ) //3. find FWHM var centerIndex = i - 1;//this is the index in the raw data for the peak top (non centroided) newcentroidPeak.Width = Convert.ToSingle(peakTopCalculation.FindFWHM(rawXYData, centerIndex, centroidedPeak, ref shoulderNoiseToLeftIndex, ref shoulderNoiseToRightIndex, Parameters.FWHMPeakFitType)); //4. calculate signal to noise newcentroidPeak.SignalToNoiseLocalHighestMinima = newcentroidPeak.Height / newcentroidPeak.LocalHighestMinimaHeight; //4. add centroided peak resultsListCentroid.Add(newcentroidPeak); } } } } return(resultsListCentroid);//Peak Centroid }