// the function CanSplitRangeFast() // makes decision whether a given range should be split or not ; // // a given range is not subdivided if the specified accuracy of // linear regression has been achieved, otherwise, // the function selects for the best split the position of // the greatest local maximum of absolute differences // between original and smoothed values in a given range ; // static public bool CanSplitRangeFast ( // original dataset List <double> data_x, List <double> data_y, // absolute differences between original and smoothed values List <double> vec_devns_in, // positions (indices) of local maxima in vec_devns_in List <int> vec_max_ind_in, // the limit for maximum allowed approximation error (tolerance) double devn_max_user, // input range to be split if linear regression is not acceptable RangeIndex idx_range_in, // the position of a split point, when the function returns <true> ref int idx_split_out, // the parameters of linear regression for the given range, // when the function returns <false> LinearRegressionParams lr_params_out ) { idx_split_out = -1; if (vec_devns_in.Count != data_x.Count) { Console.WriteLine("SLR: size error"); return(false); } int end_offset = RangeLengthMin(); int range_len = idx_range_in.Length(); if (range_len < end_offset) { Console.WriteLine("SLR: input range is too small"); return(false); } // compute linear regression and approximation error for input range double err_range_in = double.MaxValue; ComputeLinearRegression(data_x, data_y, idx_range_in, lr_params_out, ref err_range_in); // if the approximation is acceptable, input range is not subdivided if (err_range_in < devn_max_user) { return(false); } // check for indivisible range if (range_len < 2 * RangeLengthMin()) { return(false); } if (vec_devns_in.Count == 0) { return(false); } // for the main criterion of splitting here we use // the greatest local maximum of deviations inside the given range int idx_split_local_max = -1; double devn_max = 0.0; double devn_cur = 0.0; int sz_loc_max = vec_max_ind_in.Count; // find inside given range local maximum with the largest deviation for (int k_max = 0; k_max < sz_loc_max; ++k_max) { int idx_max_cur = vec_max_ind_in[k_max]; // check if the current index is inside the given range and that // potential split will not create segment with 1 data point only if ((idx_max_cur < idx_range_in.idx_a + end_offset) || (idx_max_cur >= idx_range_in.idx_b - end_offset)) { continue; } devn_cur = vec_devns_in[idx_max_cur]; if (devn_cur > devn_max) { devn_max = devn_cur; idx_split_local_max = idx_max_cur; } } // the case of no one local maximum inside the given range if (idx_split_local_max < 0) { return(false); } // the case (idx_split_local_max==0) is not possible here due to (end_offset==RANGE_LENGTH_MIN), // this is a valid result ( idx_split_local_max > 0 ) idx_split_out = idx_split_local_max; return(true); }
// the function ComputeLinearRegression() computes parameters of // linear regression and approximation error // for a given range of a given dataset ; static public void ComputeLinearRegression ( // original dataset List <double> data_x, List <double> data_y, // semi-open range [ a , b ) RangeIndex idx_range, // coefficients of linear regression in the given range LinearRegressionParams lin_regr_out, // approximation error ref double err_appr_out ) { if (idx_range.Length() < RangeLengthMin()) { Console.WriteLine("SLR error: input range is too small"); return; } int idx_a = idx_range.idx_a; int idx_b = idx_range.idx_b; double n_vals = idx_range.Length(); double sum_x = 0.0; double sum_y = 0.0; double sum_xx = 0.0; double sum_xy = 0.0; // compute the required sums: for (int it = idx_a; it < idx_b; ++it) { double xi = data_x[it]; double yi = data_y[it]; sum_x += xi; sum_y += yi; sum_xx += xi * xi; sum_xy += xi * yi; } // compute parameters of linear regression in the given range if (!LinearRegressionParameters(n_vals, sum_x, sum_y, sum_xx, sum_xy, lin_regr_out)) { // this is a very unusual case for real data //Console.WriteLine("SLR: special case error"); return; } double coef_a = lin_regr_out.coef_a; double coef_b = lin_regr_out.coef_b; // use linear regression obtained to measure approximation error in the given range, // the error is the maximum of absolute differences between original and approximation values double diff_max = 0.0; for (int it = idx_a; it < idx_b; ++it) { double xi = data_x[it]; double yi_orig = data_y[it]; double yi_appr = coef_a + coef_b * xi; double diff_i = Math.Abs(yi_orig - yi_appr); if (diff_i > diff_max) { diff_max = diff_i; } } err_appr_out = diff_max; }