Example #1
0
        //  the function CanSplitRangeFast()
        //  makes decision whether a given range should be split or not ;
        //
        //  a given range is not subdivided if the specified accuracy of
        //  linear regression has been achieved, otherwise,
        //  the function selects for the best split the position of
        //  the greatest local maximum of absolute differences
        //  between original and smoothed values in a given range ;
        //
        static public bool CanSplitRangeFast
        (
            //  original dataset
            List <double> data_x,
            List <double> data_y,
            //  absolute differences between original and smoothed values
            List <double> vec_devns_in,
            //  positions (indices) of local maxima in vec_devns_in
            List <int> vec_max_ind_in,
            //  the limit for maximum allowed approximation error (tolerance)
            double devn_max_user,
            //  input range to be split if linear regression is not acceptable
            RangeIndex idx_range_in,
            //  the position of a split point, when the function returns <true>
            ref int idx_split_out,
            //  the parameters of linear regression for the given range,
            //  when the function returns <false>
            LinearRegressionParams lr_params_out
        )
        {
            idx_split_out = -1;

            if (vec_devns_in.Count != data_x.Count)
            {
                Console.WriteLine("SLR: size error");
                return(false);
            }

            int end_offset = RangeLengthMin();
            int range_len  = idx_range_in.Length();

            if (range_len < end_offset)
            {
                Console.WriteLine("SLR: input range is too small");
                return(false);
            }

            //  compute linear regression and approximation error for input range
            double err_range_in = double.MaxValue;

            ComputeLinearRegression(data_x, data_y, idx_range_in, lr_params_out, ref err_range_in);

            //  if the approximation is acceptable, input range is not subdivided
            if (err_range_in < devn_max_user)
            {
                return(false);
            }

            //  check for indivisible range
            if (range_len < 2 * RangeLengthMin())
            {
                return(false);
            }

            if (vec_devns_in.Count == 0)
            {
                return(false);
            }

            //  for the main criterion of splitting here we use
            //  the greatest local maximum of deviations inside the given range
            int    idx_split_local_max = -1;
            double devn_max            = 0.0;
            double devn_cur            = 0.0;
            int    sz_loc_max          = vec_max_ind_in.Count;

            //  find inside given range local maximum with the largest deviation
            for (int k_max = 0; k_max < sz_loc_max; ++k_max)
            {
                int idx_max_cur = vec_max_ind_in[k_max];

                //  check if the current index is inside the given range and that
                //  potential split will not create segment with 1 data point only
                if ((idx_max_cur < idx_range_in.idx_a + end_offset) ||
                    (idx_max_cur >= idx_range_in.idx_b - end_offset))
                {
                    continue;
                }

                devn_cur = vec_devns_in[idx_max_cur];
                if (devn_cur > devn_max)
                {
                    devn_max            = devn_cur;
                    idx_split_local_max = idx_max_cur;
                }
            }

            //  the case of no one local maximum inside the given range
            if (idx_split_local_max < 0)
            {
                return(false);
            }

            //  the case (idx_split_local_max==0) is not possible here due to (end_offset==RANGE_LENGTH_MIN),
            //  this is a valid result ( idx_split_local_max > 0 )
            idx_split_out = idx_split_local_max;

            return(true);
        }
Example #2
0
        //  the function ComputeLinearRegression() computes parameters of
        //  linear regression and approximation error
        //  for a given range of a given dataset ;
        static public void ComputeLinearRegression
        (
            //  original dataset
            List <double> data_x,
            List <double> data_y,
            //  semi-open range [ a , b )
            RangeIndex idx_range,
            //  coefficients of linear regression in the given range
            LinearRegressionParams lin_regr_out,
            //  approximation error
            ref double err_appr_out
        )
        {
            if (idx_range.Length() < RangeLengthMin())
            {
                Console.WriteLine("SLR error: input range is too small");
                return;
            }

            int    idx_a  = idx_range.idx_a;
            int    idx_b  = idx_range.idx_b;
            double n_vals = idx_range.Length();
            double sum_x  = 0.0;
            double sum_y  = 0.0;
            double sum_xx = 0.0;
            double sum_xy = 0.0;

            //  compute the required sums:
            for (int it = idx_a; it < idx_b; ++it)
            {
                double xi = data_x[it];
                double yi = data_y[it];
                sum_x  += xi;
                sum_y  += yi;
                sum_xx += xi * xi;
                sum_xy += xi * yi;
            }

            //  compute parameters of linear regression in the given range
            if (!LinearRegressionParameters(n_vals, sum_x, sum_y, sum_xx, sum_xy, lin_regr_out))
            {
                //  this is a very unusual case for real data
                //Console.WriteLine("SLR: special case error");
                return;
            }

            double coef_a = lin_regr_out.coef_a;
            double coef_b = lin_regr_out.coef_b;

            //  use linear regression obtained to measure approximation error in the given range,
            //  the error is the maximum of absolute differences between original and approximation values
            double diff_max = 0.0;

            for (int it = idx_a; it < idx_b; ++it)
            {
                double xi      = data_x[it];
                double yi_orig = data_y[it];
                double yi_appr = coef_a + coef_b * xi;

                double diff_i = Math.Abs(yi_orig - yi_appr);
                if (diff_i > diff_max)
                {
                    diff_max = diff_i;
                }
            }

            err_appr_out = diff_max;
        }