public static bool TryGetRegressionLine(IList<double> listIndependent, IList<double> listDependent, int minPoints, out RegressionLine line) { line = null; if (listIndependent.Count != listDependent.Count || listIndependent.Count < minPoints) return false; double correlation; while (true) { var statIndependent = new Statistics(listIndependent); var statDependent = new Statistics(listDependent); line = new RegressionLine(statDependent.Slope(statIndependent), statDependent.Intercept(statIndependent)); correlation = statDependent.R(statIndependent); if (correlation >= MIN_IRT_TO_TIME_CORRELATION || listIndependent.Count <= minPoints) break; var furthest = 0; var maxDistance = 0.0; for (var i = 0; i < listDependent.Count; i++) { var distance = Math.Abs(line.GetY(listDependent[i]) - listIndependent[i]); if (distance > maxDistance) { furthest = i; maxDistance = distance; } } listIndependent.RemoveAt(furthest); listDependent.RemoveAt(furthest); } return correlation >= MIN_IRT_TO_TIME_CORRELATION; }
public RegressionGraphPane(RegressionGraphData graphData) { _graphData = graphData; Title.Text = graphData.Title; XAxis.Title.Text = graphData.LabelX; YAxis.Title.Text = graphData.LabelY; Border.IsVisible = false; Title.IsVisible = true; Chart.Border.IsVisible = false; XAxis.Scale.MaxAuto = true; XAxis.Scale.MinAuto = true; YAxis.Scale.MaxAuto = true; YAxis.Scale.MinAuto = true; Y2Axis.IsVisible = false; X2Axis.IsVisible = false; XAxis.MajorTic.IsOpposite = false; YAxis.MajorTic.IsOpposite = false; XAxis.MinorTic.IsOpposite = false; YAxis.MinorTic.IsOpposite = false; IsFontsScaled = false; YAxis.Scale.MaxGrace = 0.1; // Legend.FontSpec.Size = 12; var curve = AddCurve(Resources.RegressionGraphPane_RegressionGraphPane_Values, graphData.XValues, graphData.YValues, Color.Black, SymbolType.Diamond); curve.Line.IsVisible = false; curve.Symbol.Border.IsVisible = false; curve.Symbol.Fill = new Fill(COLOR_REGRESSION); // Find maximum points for drawing the regression line var lineX = new[] { double.MaxValue, double.MinValue }; var lineY = new[] { double.MaxValue, double.MinValue }; for (int i = 0; i < graphData.XValues.Length; i++) { double xValue = graphData.XValues[i]; double yValue = graphData.YValues[i]; if (xValue < lineX[0]) { lineX[0] = xValue; lineY[0] = yValue; } if (xValue > lineX[1]) { lineX[1] = xValue; lineY[1] = yValue; } } if (graphData.RegressionLine != null) { // Recalculate the y values based on the maximum x values // and the regression. lineY[0] = graphData.RegressionLine.GetY(lineX[0]); lineY[1] = graphData.RegressionLine.GetY(lineX[1]); curve = AddCurve(Resources.RegressionGraphPane_RegressionGraphPane_Regression, lineX, lineY, COLOR_LINE_REGRESSION); curve.Line.IsAntiAlias = true; curve.Line.IsOptimizedDraw = true; Statistics statsX = new Statistics(_graphData.XValues); Statistics statsY = new Statistics(_graphData.YValues); double slope = statsY.Slope(statsX); double intercept = statsY.Intercept(statsX); _labelRegression = string.Format("{0} = {1:F04}, {2} = {3:F04}\n" + "r = {4:F02}", // Not L10N Resources.Regression_slope, slope, Resources.Regression_intercept, intercept, statsY.R(statsX)); } var regressionLineCurrent = graphData.RegressionLineCurrent; if (regressionLineCurrent != null) { lineY[0] = regressionLineCurrent.GetY(lineX[0]); lineY[1] = regressionLineCurrent.GetY(lineX[1]); curve = AddCurve(Resources.RegressionGraphPane_RegressionGraphPane_Current, lineX, lineY, COLOR_LINE_REGRESSION_CURRENT); curve.Line.IsAntiAlias = true; curve.Line.IsOptimizedDraw = true; curve.Line.Style = DashStyle.Dash; _labelRegressionCurrent = string.Format("{0} = {1:F04}, {2} = {3:F04}", // Not L10N Resources.Regression_slope, regressionLineCurrent.Slope, Resources.Regression_intercept, regressionLineCurrent.Intercept); } }
/// <summary> /// Do initial grouping of and ranking of peaks using the Crawdad /// peak detector. /// </summary> public void PickChromatogramPeaks(double[] retentionTimes, bool isAlignedTimes) { // Make sure chromatograms are in sorted order _listChromData.Sort((c1, c2) => c1.Key.CompareTo(c2.Key)); // Mark all optimization chromatograms MarkOptimizationData(); // if (Math.Round(_listChromData[0].Key.Precursor) == 585) // Console.WriteLine("Issue"); // First use Crawdad to find the peaks // If any chromatograms have an associated transition, then only find peaks // in chromatograms with transitions. It is too confusing to the user to // score peaks based on chromatograms for hidden transitions. bool hasDocNode = _listChromData.Any(chromData => chromData.DocNode != null); _listChromData.ForEach(chromData => chromData.FindPeaks(retentionTimes, // But only for fragment ions to allow hidden MS1 isotopes to participate hasDocNode && chromData.Key.Source == ChromSource.fragment)); RemoveProductConflictsByTime(retentionTimes); // Merge sort all peaks into a single list IList<ChromDataPeak> allPeaks = SplitMS(MergePeaks()); // Inspect 20 most intense peak regions var listRank = new List<double>(); Assume.IsTrue(_listPeakSets.Count == 0); for (int i = 0; i < 20 || retentionTimes.Length > 0; i++) { if (allPeaks.Count == 0) break; ChromDataPeak peak = allPeaks[0]; allPeaks.RemoveAt(0); // If peptide ID retention times are present, allow // peaks greater than 20, but only if they contain // an ID retention time. if (i >= 20 && !peak.Peak.Identified) continue; ChromDataPeakList peakSet = FindCoelutingPeaks(peak, allPeaks); peakSet.SetIdentified(retentionTimes, isAlignedTimes); _listPeakSets.Add(peakSet); listRank.Add(i); } if (_listPeakSets.Count == 0) return; // Sort by total area descending _listPeakSets.Sort((p1, p2) => Comparer<double>.Default.Compare(p2.TotalArea, p1.TotalArea)); // The peak will be a signigificant spike above the norm for this // data. Find a cut-off by removing peaks until the remaining // peaks correlate well in a linear regression. var listAreas = _listPeakSets.ConvertAll(set => set.TotalArea); // Keep at least 3 peaks listRank.RemoveRange(0, Math.Min(MINIMUM_PEAKS, listRank.Count)); listAreas.RemoveRange(0, Math.Min(MINIMUM_PEAKS, listAreas.Count)); Assume.IsTrue(listRank.Count == listAreas.Count); int iRemove = 0; // Keep all peaks for summary chromatograms if (PrecursorMz != 0) { // And there must be at least 5 peaks in the line to qualify for removal for (int i = 0, len = listAreas.Count; i < len - 4; i++) { var statsRank = new Statistics(listRank); var statsArea = new Statistics(listAreas); double rvalue = statsArea.R(statsRank); // Console.WriteLine("i = {0}, r = {1}", i, rvalue); if (Math.Abs(rvalue) > NOISE_CORRELATION_THRESHOLD) { iRemove = i + MINIMUM_PEAKS; break; } listRank.RemoveAt(0); listAreas.RemoveAt(0); } } if (iRemove == 0) { iRemove = _listPeakSets.Count; if (retentionTimes.Length == 0) { // Backward compatibility: before peptide IDs were integrated // this sorting happened before peaks were extended. _listPeakSets.Sort(ComparePeakLists); } } else if (retentionTimes.Length == 0) { // Be sure not to remove anything with a higher combined score than // what happen to look visually like the biggest peaks. double minKeepScore = _listPeakSets.Take(iRemove).Min(peakSet => peakSet.CombinedScore); // Backward compatibility: before peptide IDs were integrated // this sorting happened before peaks were extended. _listPeakSets.Sort(ComparePeakLists); iRemove = Math.Max(iRemove, _listPeakSets.IndexOf(peakSet => peakSet.CombinedScore == minKeepScore)); } else { // Make sure no identified peaks are removed. int identIndex = _listPeakSets.LastIndexOf(peakSet => peakSet.IsIdentified); if (identIndex >= iRemove) iRemove = identIndex + 1; // Or, if there were identifications, but no peaks // peaks that appear to contain them, keep all peak sets else if (identIndex != -1) iRemove = _listPeakSets.Count; } RemoveNonOverlappingPeaks(_listPeakSets, iRemove); // Since Crawdad can have a tendency to pick peaks too narrow, // use the peak group information to extend the peaks to make // them wider. // This does not handle reintegration, because peaks get reintegrated // before they are stored, taking the entire peptide into account. _listPeakSets = ExtendPeaks(_listPeakSets, retentionTimes, isAlignedTimes); // Sort by whether a peak contains an ID and then product score // This has to be done after peak extending, since extending may // change the state of whether the peak contains an ID. if (retentionTimes.Length > 0) _listPeakSets.Sort(ComparePeakLists); // if (retentionTimes.Length > 0 && !_listPeakSets[0].Identified) // Console.WriteLine("Idenifications outside peaks."); }