/// <summary> /// Find and identify characteristics of peaks in raw XYData. This includes finding candidate centroid peaks and noise thresholding. /// </summary> /// <param name="rawXYData">raw XYdata</param> /// <returns>Centroided peaks with noise removed</returns> public override Collection <Peak> DetectPeaks(Collection <XYData> collectionRawXYData) { var rawXYData = new List <XYData>(collectionRawXYData); //TODO: Scott Create constructor that accepts parameters. var newPeakCentroider = new PeakCentroider(); newPeakCentroider.Parameters = CentroidParameters; // Find peaks in profile. var centroidedPeakList = new List <ProcessedPeak>(); centroidedPeakList = newPeakCentroider.DiscoverPeaks(rawXYData); //TODO: Scott Create constructor that accepts parameters. var newPeakThresholder = new PeakThresholder(); newPeakThresholder.Parameters = ThresholdParameters; // Separate signal from noise. List <ProcessedPeak> thresholdedData = null; thresholdedData = newPeakThresholder.ApplyThreshold(centroidedPeakList); // Find peaks. var outputPeakList = ProcessedPeak.ToPeaks(thresholdedData); return(outputPeakList); }
private void ProcessSupportingPeaks (uint id, string chr, I p, List <SupportingPeak <I> > supportingPeaks, double xsqrd, Attributes attribute, Messages.Codes message) { foreach (var supPeak in supportingPeaks) { var tSupPeak = new List <SupportingPeak <I> > { new SupportingPeak <I>(p, id) }; foreach (var sP in supportingPeaks) { if (supPeak.CompareTo(sP) != 0) { tSupPeak.Add(sP); } } var pp = new ProcessedPeak <I>(supPeak.Source, xsqrd, tSupPeak); pp.Classification.Add(attribute); pp.reason = message; if (supPeak.Source.Value <= _config.TauS) { pp.Classification.Add(Attributes.Stringent); } else { pp.Classification.Add(Attributes.Weak); } _analysisResults[supPeak.SampleID].Chromosomes[chr].AddOrUpdate(pp); } }
public void CompareTwoEqualInstances(double xSquared) { // Arrange var p = new Peak ( left: 10, summit: 15, right: 20, name: "MSPC_Peak", value: 100 ); var sup = new List <SupportingPeak <Peak> > { new SupportingPeak <Peak>(new Peak ( left: 5, right: 25, summit: 15, name: "MSPC_SupPeak", value: 123 ), 1) }; var pp1 = new ProcessedPeak <Peak>(p, xSquared, sup); var pp2 = new ProcessedPeak <Peak>(p, xSquared, sup); // Act var r = pp1.Equals(pp2); // Assert Assert.True(r); }
public void IfConfirmedAndDiscardedThenKeepOnlyConfirmed() { // Arrange var sets = new Sets <Peak>(2, ReplicateType.Biological); var confirmedPeak = new ProcessedPeak <Peak>( new Peak(1, 10, 100), 10, new List <SupportingPeak <Peak> >()); confirmedPeak.Classification.Add(Attributes.Confirmed); var discardedPeak = new ProcessedPeak <Peak>( new Peak(1, 10, 100), 10, new List <SupportingPeak <Peak> >()); discardedPeak.Classification.Add(Attributes.Discarded); // Act sets.AddOrUpdate(discardedPeak); sets.AddOrUpdate(confirmedPeak); // Assert Assert.True(sets.Get(Attributes.Confirmed).Any()); Assert.False(sets.Get(Attributes.Discarded).Any()); }
public int Compare(ProcessedPeak <I> x, ProcessedPeak <I> y) { if (x == null) { if (y == null) { return(0); // they are equal } else { return(-1); // B is greater } } else { if (y == null) { return(1); // A is greater } else { if (x.Source.Value != y.Source.Value) { return(x.Source.Value.CompareTo(y.Source.Value)); } else if (x.Source.Left != y.Source.Left) { return(x.Source.Left.CompareTo(y.Source.Left)); } else { return(x.Source.Right.CompareTo(y.Source.Right)); } } } }
/// <summary> /// Find candidate peaks in the spectra (incressing and then decreasing). For each peak top, find centroid /// </summary> /// <param name="RawXYData">List of PNNL Omics XYData</param> /// <param name="parameters">parameters needed for the fit</param> public List <ProcessedPeak> DiscoverPeaks(List <XYData> rawXYData) { var resultsListCentroid = new List <ProcessedPeak>(); if (rawXYData == null || rawXYData.Count == 0) { return(resultsListCentroid); } var numPoints = rawXYData.Count; if (Parameters.IsXYDataCentroided) { var width = Convert.ToSingle(Parameters.DefaultFWHMForCentroidedData); foreach (var rawData in rawXYData) { var newPreCentroidedPeak = new ProcessedPeak(); newPreCentroidedPeak.XValue = rawData.X; newPreCentroidedPeak.Height = rawData.Y; newPreCentroidedPeak.Width = width; resultsListCentroid.Add(newPreCentroidedPeak); } } else { // Holds the apex of a fitted parabola. var peakTopParabolaPoints = new List <XYData>(); //TODO: Assert that the number of points is 3, 5, 7? Throw exception if not odd and greater than 3. for (var i = 0; i < Parameters.NumberOfPoints; i++)//number of points must be 3,5,7 { var newPoint = new XYData(0, 0); peakTopParabolaPoints.Add(newPoint); } var centroidedPeak = new XYData(0, 0); for (var i = 1; i < numPoints - 1; i++)//numPoints-1 because of possible overrun error 4 lines down i+=1 { // This loop will look for local differential maxima //TODO: Refactor? while (rawXYData[i].Y > rawXYData[i - 1].Y && i < numPoints - 1) //Is it Still Increasing? { // Look at next peak. i++; if (rawXYData[i].Y < rawXYData[i - 1].Y) // Is it Decreasing? { //peak top data point is at location i-1 var newcentroidPeak = new ProcessedPeak(); //1. find local noise (or shoulder noise) by finding the average fo the local minima on each side of the peak //XYData storeMinimaDataIndex = new XYData();//will contain the index of the locations where the surrounding local mnima are var shoulderNoiseToLeftIndex = 0; var shoulderNoiseToRightIndex = 0; var peakTopCalculation = new PeakCentroider(); newcentroidPeak.LocalLowestMinimaHeight = peakTopCalculation.FindShoulderNoise(ref rawXYData, i - 1, Parameters.DefaultShoulderNoiseValue, ref shoulderNoiseToLeftIndex, ref shoulderNoiseToRightIndex); newcentroidPeak.MinimaOfLowerMassIndex = shoulderNoiseToLeftIndex; newcentroidPeak.MinimaOfHigherMassIndex = shoulderNoiseToRightIndex; newcentroidPeak.LocalHighestMinimaHeight = Convert.ToDouble(Math.Max((decimal)rawXYData[shoulderNoiseToLeftIndex].Y, (decimal)rawXYData[shoulderNoiseToRightIndex].Y)); newcentroidPeak.LocalHighestMinimaHeight = Convert.ToDouble(Math.Max((decimal)newcentroidPeak.LocalHighestMinimaHeight, 1)); //takes care of the 0 condition if (rawXYData[i].Y > rawXYData[i - 2].Y) //decide which flanking point is lower. the higher will have the max closer to it. i-1 is the max point { newcentroidPeak.CenterIndexLeft = i - 1; //this is interesting because we always return the point just to the left of the parabola apex } else { newcentroidPeak.CenterIndexLeft = i - 2; } //2. centroid peaks via fitting a parabola //TODO: decide if sending indexes is better becaus the modulariy of the parabola finder will be broken //store points to go to the parabola fitter for (var j = 0; j < Parameters.NumberOfPoints; j += 1) { var index = i - 1 - (int)(Parameters.NumberOfPoints / (float)2 - (float)0.5) + j;//since number of points is 3,5,7 it will divide nicely peakTopParabolaPoints[j] = rawXYData[index]; } //calculate parabola apex returning int and centroided MZ centroidedPeak = peakTopCalculation.Parabola(peakTopParabolaPoints); newcentroidPeak.XValue = centroidedPeak.X; newcentroidPeak.Height = centroidedPeak.Y; //if it fails, we simply select the center peak. This fails when the three y values are very very similar (within the tolerence of a single) if (double.IsNaN(newcentroidPeak.Height)) { newcentroidPeak.XValue = peakTopParabolaPoints[1].X; newcentroidPeak.Height = peakTopParabolaPoints[1].Y; } //if(double.IsPositiveInfinity(newcentroidPeak.XValue) || double.IsNegativeInfinity(newcentroidPeak.XValue) ) //3. find FWHM var centerIndex = i - 1;//this is the index in the raw data for the peak top (non centroided) newcentroidPeak.Width = Convert.ToSingle(peakTopCalculation.FindFWHM(rawXYData, centerIndex, centroidedPeak, ref shoulderNoiseToLeftIndex, ref shoulderNoiseToRightIndex, Parameters.FWHMPeakFitType)); //4. calculate signal to noise newcentroidPeak.SignalToNoiseLocalHighestMinima = newcentroidPeak.Height / newcentroidPeak.LocalHighestMinimaHeight; //4. add centroided peak resultsListCentroid.Add(newcentroidPeak); } } } } return(resultsListCentroid);//Peak Centroid }
private void ProcessChr(uint sampleKey, KeyValuePair <string, Chromosome <I, BedStats> > chr) { Attributes attribute; foreach (var strand in chr.Value.Strands) { foreach (I peak in strand.Value.Intervals) { if (_worker.CancellationPending) { return; } if (peak.Value < _config.TauS) { attribute = Attributes.Stringent; } else if (peak.Value < _config.TauW) { attribute = Attributes.Weak; } else { var pp = new ProcessedPeak <I>(peak, double.NaN, new List <SupportingPeak <I> >()); pp.Classification.Add(Attributes.Background); _analysisResults[sampleKey].Chromosomes[chr.Key].AddOrUpdate(pp); continue; } var supportingPeaks = FindSupportingPeaks(sampleKey, chr.Key, peak); if (supportingPeaks.Count + 1 >= _config.C) { double xsqrd = CalculateXsqrd(peak, supportingPeaks); var pp = new ProcessedPeak <I>(peak, xsqrd, supportingPeaks); pp.Classification.Add(attribute); if (xsqrd >= _cachedChiSqrd[supportingPeaks.Count]) { pp.Classification.Add(Attributes.Confirmed); _analysisResults[sampleKey].Chromosomes[chr.Key].AddOrUpdate(pp); ProcessSupportingPeaks( sampleKey, chr.Key, peak, supportingPeaks, xsqrd, Attributes.Confirmed, Messages.Codes.M000); } else { pp.reason = Messages.Codes.M001; pp.Classification.Add(Attributes.Discarded); _analysisResults[sampleKey].Chromosomes[chr.Key].AddOrUpdate(pp); ProcessSupportingPeaks( sampleKey, chr.Key, peak, supportingPeaks, xsqrd, Attributes.Discarded, Messages.Codes.M001); } } else { var pp = new ProcessedPeak <I>(peak, 0, supportingPeaks); pp.Classification.Add(attribute); pp.Classification.Add(Attributes.Discarded); pp.reason = Messages.Codes.M002; _analysisResults[sampleKey].Chromosomes[chr.Key].AddOrUpdate(pp); } } } }
/// <summary> /// calculate mean of the noise (aka mean of the data) then calculate the standard deviation of the noise (aks data) /// if peak is above Xsigma+mean reject /// FWHM thresholding is not proving to be usefull yet. /// </summary> /// <param name="peakData">input peaks we want to threshold</param> /// <param name="peakShoulderNoise">lowest minima point intensity surrounding the peak</param> /// <param name="localMinimaData">index of minima on each side of point X=left, Y=right</param> /// <param name="parameters">Peakthreshold parameters</param> public List <ProcessedPeak> ApplyThreshold(List <ProcessedPeak> peakList) { var resultListThresholded = new List <ProcessedPeak>(); if (peakList == null || peakList.Count == 0) { return(resultListThresholded); } var numPoints = peakList.Count; double signaltoShoulder = 0; double signaltoBackground = 0; double signaltoNoise = 0; double thresholdIntensity = 0; if (!Parameters.isDataThresholded) { #region calculate average noise value and average shoulderNoiseLeve = baseline double averageShoulderNoise = 0; //average of higher minima double averagePeakNoise = 0; //average of all data. usefull if noise dominates double averageBackgroundNoise = 0; //average of lower minima = baseline double averageNoise = 0; //average between the the lower and higher minima. this means that half the minima are higher and half the minima are lower. should also work well on large numbers of points for (var i = 0; i < numPoints; i++) { //averageShoulderNoise += peakShoulderNoise[i]; //averagePeakNoise += peakData[i].Y; averageShoulderNoise += peakList[i].LocalHighestMinimaHeight; averageBackgroundNoise += peakList[i].LocalLowestMinimaHeight; averagePeakNoise += peakList[i].Height; averageNoise += (peakList[i].LocalHighestMinimaHeight + peakList[i].LocalLowestMinimaHeight) / 2;//this is pretty nice } #endregion averageShoulderNoise /= numPoints; //worst case senario averageBackgroundNoise /= numPoints; //average background or baseline averagePeakNoise /= numPoints; //works if the noise dominates the spectra averageNoise /= numPoints; //good depection of the overall background of the data #region calculate standard deviation double stdevSumDeviationsSquared; double standardDevAllSignal; double MAD; double stdevMAD; CalculateDeviation(peakList, numPoints, averageNoise, out stdevSumDeviationsSquared, out standardDevAllSignal, out MAD, out stdevMAD); #endregion stdevMAD = MAD * 1.4826; standardDevAllSignal = Math.Sqrt(stdevSumDeviationsSquared / (numPoints - 1)); for (var i = 0; i < numPoints; i++) { var thresholdedPeak = new ProcessedPeak(); signaltoShoulder = peakList[i].Height / peakList[i].LocalHighestMinimaHeight; signaltoBackground = peakList[i].Height / averageBackgroundNoise; signaltoNoise = peakList[i].Height / averagePeakNoise; //thresholdIntensity = Parameters.SignalToShoulderCuttoff * stdevMAD + averagePeakNoise;//average peak noise is too high thresholdIntensity = Parameters.SignalToShoulderCuttoff * stdevMAD + averageNoise;//average noise is nice here if (peakList[i].Height >= thresholdIntensity) { //include high abundant peaks thresholdedPeak = peakList[i]; thresholdedPeak.SignalToNoiseGlobal = signaltoNoise; thresholdedPeak.SignalToNoiseLocalHighestMinima = signaltoShoulder; thresholdedPeak.SignalToBackground = signaltoBackground; resultListThresholded.Add(thresholdedPeak);// parameters.ThresholdedPeakData.Add(thresholdedPeak); } } //TODO: possible FWHM filtering ////now that we have a global threshold, repeat filter by FWHM for the most abundant peaks so we can redraw the threshold line //convert to similar triangles //bool shouldWeThresholdByFWHM = false; //if (shouldWeThresholdByFWHM) //{ // #region peakfiltering by FWHM // //List<XYData> sortedPeaks = new List<XYData>(); // parameters.ThresholdedObjectlist = parameters.ThresholdedObjectlist.OrderBy(p => p.PeakData.Y).ToList(); // //parameters.ThresholdedPeakData = sortedPeaks; // double topPeaksFactor = 0.25;//how much of the list do we to use for our average FWHM. 0.5 means use top half of peaks above threshold // double averageFWHM = 0; // //calculate average // for (int i = 0; i < (int)(parameters.ThresholdedObjectlist.Count * topPeaksFactor); i++) // { // averageFWHM += parameters.ThresholdedObjectlist[i].PeakFWHM; // } // averageFWHM /= parameters.ThresholdedObjectlist.Count; // stdevSumDeviationsSquared = 0; // //calculate standard deviation // for (int i = 0; i < parameters.ThresholdedObjectlist.Count; i++) // { // stdevDeviations = (parameters.ThresholdedObjectlist[i].PeakFWHM - averageFWHM); // stdevDeviationsSquared = stdevDeviations * stdevDeviations; // stdevSumDeviationsSquared += stdevDeviationsSquared; // } // double standardFWHM = Math.Sqrt(stdevSumDeviationsSquared / (parameters.ThresholdedObjectlist.Count - 1)); // int y = 6; // y = y * (int)standardFWHM; // #endregion //} } else//add all peaks since the data is thresholded already ot setup some sort of other cuttoff { for (var i = 0; i < numPoints; i++) { //include all peaks var thresholdedPeak = new ProcessedPeak(); thresholdedPeak = peakList[i]; resultListThresholded.Add(thresholdedPeak);// parameters.ThresholdedPeakData.Add(thresholdedPeak); } } return(resultListThresholded); }