/************************************************************************* Constained linear least squares fitting. This is variation of LSFitLinear(), which searchs for min|A*x=b| given that K additional constaints C*x=bc are satisfied. It reduces original task to modified one: min|B*y-d| WITHOUT constraints, then LSFitLinear() is called. IMPORTANT: if you want to perform polynomial fitting, it may be more convenient to use PolynomialFit() function. This function gives best results on polynomial problems and solves numerical stability issues which arise when you fit high-degree polynomials to your data. INPUT PARAMETERS: Y - array[0..N-1] Function values in N points. FMatrix - a table of basis functions values, array[0..N-1, 0..M-1]. FMatrix[I,J] - value of J-th basis function in I-th point. CMatrix - a table of constaints, array[0..K-1,0..M]. I-th row of CMatrix corresponds to I-th linear constraint: CMatrix[I,0]*C[0] + ... + CMatrix[I,M-1]*C[M-1] = CMatrix[I,M] N - number of points used. N>=1. M - number of basis functions, M>=1. K - number of constraints, 0 <= K < M K=0 corresponds to absence of constraints. OUTPUT PARAMETERS: Info - error code: * -4 internal SVD decomposition subroutine failed (very rare and for degenerate systems only) * -3 either too many constraints (M or more), degenerate constraints (some constraints are repetead twice) or inconsistent constraints were specified. * 1 task is solved C - decomposition coefficients, array[0..M-1] Rep - fitting report. Following fields are set: * R2 non-adjusted coefficient of determination (non-weighted) * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED IMPORTANT: this subroitine doesn't calculate task's condition number for K<>0. ERRORS IN PARAMETERS This solver also calculates different kinds of errors in parameters and fills corresponding fields of report: * Rep.CovPar covariance matrix for parameters, array[K,K]. * Rep.ErrPar errors in parameters, array[K], errpar = sqrt(diag(CovPar)) * Rep.ErrCurve vector of fit errors - standard deviations of empirical best-fit curve from "ideal" best-fit curve built with infinite number of samples, array[N]. errcurve = sqrt(diag(F*CovPar*F')), where F is functions matrix. * Rep.Noise vector of per-point estimates of noise, array[N] IMPORTANT: errors in parameters are calculated without taking into account boundary/linear constraints! Presence of constraints changes distribution of errors, but there is no easy way to account for constraints when you calculate covariance matrix. NOTE: noise in the data is estimated as follows: * for fitting without user-supplied weights all points are assumed to have same level of noise, which is estimated from the data * for fitting with user-supplied weights we assume that noise level in I-th point is inversely proportional to Ith weight. Coefficient of proportionality is estimated from the data. NOTE: we apply small amount of regularization when we invert squared Jacobian and calculate covariance matrix. It guarantees that algorithm won't divide by zero during inversion, but skews error estimates a bit (fractional error is about 10^-9). However, we believe that this difference is insignificant for all practical purposes except for the situation when you want to compare ALGLIB results with "reference" implementation up to the last significant digit. NOTE: covariance matrix is estimated using correction for degrees of freedom (covariances are divided by N-M instead of dividing by N). -- ALGLIB -- Copyright 07.09.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinearc(double[] y, double[,] fmatrix, double[,] cmatrix, int n, int m, int k, ref int info, ref double[] c, lsfitreport rep) { double[] w = new double[0]; int i = 0; y = (double[])y.Clone(); info = 0; c = new double[0]; alglib.ap.assert(n>=1, "LSFitLinearC: N<1!"); alglib.ap.assert(m>=1, "LSFitLinearC: M<1!"); alglib.ap.assert(k>=0, "LSFitLinearC: K<0!"); alglib.ap.assert(alglib.ap.len(y)>=n, "LSFitLinearC: length(Y)<N!"); alglib.ap.assert(apserv.isfinitevector(y, n), "LSFitLinearC: Y contains infinite or NaN values!"); alglib.ap.assert(alglib.ap.rows(fmatrix)>=n, "LSFitLinearC: rows(FMatrix)<N!"); alglib.ap.assert(alglib.ap.cols(fmatrix)>=m, "LSFitLinearC: cols(FMatrix)<M!"); alglib.ap.assert(apserv.apservisfinitematrix(fmatrix, n, m), "LSFitLinearC: FMatrix contains infinite or NaN values!"); alglib.ap.assert(alglib.ap.rows(cmatrix)>=k, "LSFitLinearC: rows(CMatrix)<K!"); alglib.ap.assert(alglib.ap.cols(cmatrix)>=m+1 || k==0, "LSFitLinearC: cols(CMatrix)<M+1!"); alglib.ap.assert(apserv.apservisfinitematrix(cmatrix, k, m+1), "LSFitLinearC: CMatrix contains infinite or NaN values!"); w = new double[n]; for(i=0; i<=n-1; i++) { w[i] = 1; } lsfitlinearwc(y, w, fmatrix, cmatrix, n, m, k, ref info, ref c, rep); }
/************************************************************************* Linear least squares fitting, without weights. See LSFitLinearW for more information. -- ALGLIB -- Copyright 17.08.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinear(ref double[] y, ref double[,] fmatrix, int n, int m, ref int info, ref double[] c, ref lsfitreport rep) { double[] w = new double[0]; int i = 0; if( n<1 ) { info = -1; return; } w = new double[n]; for(i=0; i<=n-1; i++) { w[i] = 1; } lsfitlinearinternal(ref y, ref w, ref fmatrix, n, m, ref info, ref c, ref rep); }
public static void lsfitlinearc(double[] y, double[,] fmatrix, double[,] cmatrix, out int info, out double[] c, out lsfitreport rep) { int n; int m; int k; if( (ap.len(y)!=ap.rows(fmatrix))) throw new alglibexception("Error while calling 'lsfitlinearc': looks like one of arguments has wrong size"); if( (ap.cols(fmatrix)!=ap.cols(cmatrix)-1)) throw new alglibexception("Error while calling 'lsfitlinearc': looks like one of arguments has wrong size"); info = 0; c = new double[0]; rep = new lsfitreport(); n = ap.len(y); m = ap.cols(fmatrix); k = ap.rows(cmatrix); lsfit.lsfitlinearc(y, fmatrix, cmatrix, n, m, k, ref info, ref c, rep.innerobj); return; }
/************************************************************************* Nonlinear least squares fitting results. Called after LSFitNonlinearIteration() returned False. INPUT PARAMETERS: State - algorithm state (used by LSFitNonlinearIteration). OUTPUT PARAMETERS: Info - completetion code: * -1 incorrect parameters were specified * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken C - array[0..K-1], solution Rep - optimization report. Following fields are set: * Rep.TerminationType completetion code: * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED -- ALGLIB -- Copyright 17.08.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitnonlinearresults(ref lsfitstate state, ref int info, ref double[] c, ref lsfitreport rep) { int i_ = 0; info = state.repterminationtype; if( info>0 ) { c = new double[state.k]; for(i_=0; i_<=state.k-1;i_++) { c[i_] = state.c[i_]; } rep.rmserror = state.reprmserror; rep.avgerror = state.repavgerror; rep.avgrelerror = state.repavgrelerror; rep.maxerror = state.repmaxerror; } }
/************************************************************************* Internal fitting subroutine *************************************************************************/ private static void lsfitlinearinternal(ref double[] y, ref double[] w, ref double[,] fmatrix, int n, int m, ref int info, ref double[] c, ref lsfitreport rep) { double threshold = 0; double[,] ft = new double[0,0]; double[,] q = new double[0,0]; double[,] l = new double[0,0]; double[,] r = new double[0,0]; double[] b = new double[0]; double[] wmod = new double[0]; double[] tau = new double[0]; int i = 0; int j = 0; double v = 0; double[] sv = new double[0]; double[,] u = new double[0,0]; double[,] vt = new double[0,0]; double[] tmp = new double[0]; double[] utb = new double[0]; double[] sutb = new double[0]; int relcnt = 0; int i_ = 0; if( n<1 | m<1 ) { info = -1; return; } info = 1; threshold = Math.Sqrt(AP.Math.MachineEpsilon); // // Degenerate case, needs special handling // if( n<m ) { // // Create design matrix. // ft = new double[n, m]; b = new double[n]; wmod = new double[n]; for(j=0; j<=n-1; j++) { v = w[j]; for(i_=0; i_<=m-1;i_++) { ft[j,i_] = v*fmatrix[j,i_]; } b[j] = w[j]*y[j]; wmod[j] = 1; } // // LQ decomposition and reduction to M=N // c = new double[m]; for(i=0; i<=m-1; i++) { c[i] = 0; } rep.taskrcond = 0; ortfac.rmatrixlq(ref ft, n, m, ref tau); ortfac.rmatrixlqunpackq(ref ft, n, m, ref tau, n, ref q); ortfac.rmatrixlqunpackl(ref ft, n, m, ref l); lsfitlinearinternal(ref b, ref wmod, ref l, n, n, ref info, ref tmp, ref rep); if( info<=0 ) { return; } for(i=0; i<=n-1; i++) { v = tmp[i]; for(i_=0; i_<=m-1;i_++) { c[i_] = c[i_] + v*q[i,i_]; } } return; } // // N>=M. Generate design matrix and reduce to N=M using // QR decomposition. // ft = new double[n, m]; b = new double[n]; for(j=0; j<=n-1; j++) { v = w[j]; for(i_=0; i_<=m-1;i_++) { ft[j,i_] = v*fmatrix[j,i_]; } b[j] = w[j]*y[j]; } ortfac.rmatrixqr(ref ft, n, m, ref tau); ortfac.rmatrixqrunpackq(ref ft, n, m, ref tau, m, ref q); ortfac.rmatrixqrunpackr(ref ft, n, m, ref r); tmp = new double[m]; for(i=0; i<=m-1; i++) { tmp[i] = 0; } for(i=0; i<=n-1; i++) { v = b[i]; for(i_=0; i_<=m-1;i_++) { tmp[i_] = tmp[i_] + v*q[i,i_]; } } b = new double[m]; for(i_=0; i_<=m-1;i_++) { b[i_] = tmp[i_]; } // // R contains reduced MxM design upper triangular matrix, // B contains reduced Mx1 right part. // // Determine system condition number and decide // should we use triangular solver (faster) or // SVD-based solver (more stable). // // We can use LU-based RCond estimator for this task. // rep.taskrcond = rcond.rmatrixlurcondinf(ref r, m); if( (double)(rep.taskrcond)>(double)(threshold) ) { // // use QR-based solver // c = new double[m]; c[m-1] = b[m-1]/r[m-1,m-1]; for(i=m-2; i>=0; i--) { v = 0.0; for(i_=i+1; i_<=m-1;i_++) { v += r[i,i_]*c[i_]; } c[i] = (b[i]-v)/r[i,i]; } } else { // // use SVD-based solver // if( !svd.rmatrixsvd(r, m, m, 1, 1, 2, ref sv, ref u, ref vt) ) { info = -4; return; } utb = new double[m]; sutb = new double[m]; for(i=0; i<=m-1; i++) { utb[i] = 0; } for(i=0; i<=m-1; i++) { v = b[i]; for(i_=0; i_<=m-1;i_++) { utb[i_] = utb[i_] + v*u[i,i_]; } } if( (double)(sv[0])>(double)(0) ) { rep.taskrcond = sv[m-1]/sv[0]; for(i=0; i<=m-1; i++) { if( (double)(sv[i])>(double)(threshold*sv[0]) ) { sutb[i] = utb[i]/sv[i]; } else { sutb[i] = 0; } } } else { rep.taskrcond = 0; for(i=0; i<=m-1; i++) { sutb[i] = 0; } } c = new double[m]; for(i=0; i<=m-1; i++) { c[i] = 0; } for(i=0; i<=m-1; i++) { v = sutb[i]; for(i_=0; i_<=m-1;i_++) { c[i_] = c[i_] + v*vt[i,i_]; } } } // // calculate errors // rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; rep.maxerror = 0; relcnt = 0; for(i=0; i<=n-1; i++) { v = 0.0; for(i_=0; i_<=m-1;i_++) { v += fmatrix[i,i_]*c[i_]; } rep.rmserror = rep.rmserror+AP.Math.Sqr(v-y[i]); rep.avgerror = rep.avgerror+Math.Abs(v-y[i]); if( (double)(y[i])!=(double)(0) ) { rep.avgrelerror = rep.avgrelerror+Math.Abs(v-y[i])/Math.Abs(y[i]); relcnt = relcnt+1; } rep.maxerror = Math.Max(rep.maxerror, Math.Abs(v-y[i])); } rep.rmserror = Math.Sqrt(rep.rmserror/n); rep.avgerror = rep.avgerror/n; if( relcnt!=0 ) { rep.avgrelerror = rep.avgrelerror/relcnt; } }
/************************************************************************* This is internal function for Chebyshev fitting. It assumes that input data are normalized: * X/XC belong to [-1,+1], * mean(Y)=0, stddev(Y)=1. It does not checks inputs for errors. This function is used to fit general (shifted) Chebyshev models, power basis models or barycentric models. INPUT PARAMETERS: X - points, array[0..N-1]. Y - function values, array[0..N-1]. W - weights, array[0..N-1] N - number of points, N>0. XC - points where polynomial values/derivatives are constrained, array[0..K-1]. YC - values of constraints, array[0..K-1] DC - array[0..K-1], types of constraints: * DC[i]=0 means that P(XC[i])=YC[i] * DC[i]=1 means that P'(XC[i])=YC[i] K - number of constraints, 0<=K<M. K=0 means no constraints (XC/YC/DC are not used in such cases) M - number of basis functions (= polynomial_degree + 1), M>=1 OUTPUT PARAMETERS: Info- same format as in LSFitLinearW() subroutine: * Info>0 task is solved * Info<=0 an error occured: -4 means inconvergence of internal SVD -3 means inconsistent constraints C - interpolant in Chebyshev form; [-1,+1] is used as base interval Rep - report, same format as in LSFitLinearW() subroutine. Following fields are set: * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED IMPORTANT: this subroitine doesn't calculate task's condition number for K<>0. -- ALGLIB PROJECT -- Copyright 10.12.2009 by Bochkanov Sergey *************************************************************************/ private static void internalchebyshevfit(double[] x, double[] y, double[] w, int n, double[] xc, double[] yc, int[] dc, int k, int m, ref int info, ref double[] c, lsfitreport rep) { double[] y2 = new double[0]; double[] w2 = new double[0]; double[] tmp = new double[0]; double[] tmp2 = new double[0]; double[] tmpdiff = new double[0]; double[] bx = new double[0]; double[] by = new double[0]; double[] bw = new double[0]; double[,] fmatrix = new double[0,0]; double[,] cmatrix = new double[0,0]; int i = 0; int j = 0; double mx = 0; double decay = 0; int i_ = 0; xc = (double[])xc.Clone(); yc = (double[])yc.Clone(); info = 0; c = new double[0]; clearreport(rep); // // weight decay for correct handling of task which becomes // degenerate after constraints are applied // decay = 10000*math.machineepsilon; // // allocate space, initialize/fill: // * FMatrix- values of basis functions at X[] // * CMatrix- values (derivatives) of basis functions at XC[] // * fill constraints matrix // * fill first N rows of design matrix with values // * fill next M rows of design matrix with regularizing term // * append M zeros to Y // * append M elements, mean(abs(W)) each, to W // y2 = new double[n+m]; w2 = new double[n+m]; tmp = new double[m]; tmpdiff = new double[m]; fmatrix = new double[n+m, m]; if( k>0 ) { cmatrix = new double[k, m+1]; } // // Fill design matrix, Y2, W2: // * first N rows with basis functions for original points // * next M rows with decay terms // for(i=0; i<=n-1; i++) { // // prepare Ith row // use Tmp for calculations to avoid multidimensional arrays overhead // for(j=0; j<=m-1; j++) { if( j==0 ) { tmp[j] = 1; } else { if( j==1 ) { tmp[j] = x[i]; } else { tmp[j] = 2*x[i]*tmp[j-1]-tmp[j-2]; } } } for(i_=0; i_<=m-1;i_++) { fmatrix[i,i_] = tmp[i_]; } } for(i=0; i<=m-1; i++) { for(j=0; j<=m-1; j++) { if( i==j ) { fmatrix[n+i,j] = decay; } else { fmatrix[n+i,j] = 0; } } } for(i_=0; i_<=n-1;i_++) { y2[i_] = y[i_]; } for(i_=0; i_<=n-1;i_++) { w2[i_] = w[i_]; } mx = 0; for(i=0; i<=n-1; i++) { mx = mx+Math.Abs(w[i]); } mx = mx/n; for(i=0; i<=m-1; i++) { y2[n+i] = 0; w2[n+i] = mx; } // // fill constraints matrix // for(i=0; i<=k-1; i++) { // // prepare Ith row // use Tmp for basis function values, // TmpDiff for basos function derivatives // for(j=0; j<=m-1; j++) { if( j==0 ) { tmp[j] = 1; tmpdiff[j] = 0; } else { if( j==1 ) { tmp[j] = xc[i]; tmpdiff[j] = 1; } else { tmp[j] = 2*xc[i]*tmp[j-1]-tmp[j-2]; tmpdiff[j] = 2*(tmp[j-1]+xc[i]*tmpdiff[j-1])-tmpdiff[j-2]; } } } if( dc[i]==0 ) { for(i_=0; i_<=m-1;i_++) { cmatrix[i,i_] = tmp[i_]; } } if( dc[i]==1 ) { for(i_=0; i_<=m-1;i_++) { cmatrix[i,i_] = tmpdiff[i_]; } } cmatrix[i,m] = yc[i]; } // // Solve constrained task // if( k>0 ) { // // solve using regularization // lsfitlinearwc(y2, w2, fmatrix, cmatrix, n+m, m, k, ref info, ref c, rep); } else { // // no constraints, no regularization needed // lsfitlinearwc(y, w, fmatrix, cmatrix, n, m, 0, ref info, ref c, rep); } if( info<0 ) { return; } }
private static void clearreport(lsfitreport rep) { rep.taskrcond = 0; rep.iterationscount = 0; rep.varidx = -1; rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; rep.maxerror = 0; rep.wrmserror = 0; rep.r2 = 0; rep.covpar = new double[0, 0]; rep.errpar = new double[0]; rep.errcurve = new double[0]; rep.noise = new double[0]; }
/************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_lsfitlinearc(double[] y, double[,] fmatrix, double[,] cmatrix, int n, int m, int k, ref int info, ref double[] c, lsfitreport rep) { lsfitlinearc(y,fmatrix,cmatrix,n,m,k,ref info,ref c,rep); }
/************************************************************************* Nonlinear least squares fitting results. Called after return from LSFitFit(). INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: Info - completetion code: * -7 gradient verification failed. See LSFitSetGradientCheck() for more information. * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible C - array[0..K-1], solution Rep - optimization report. Following fields are set: * Rep.TerminationType completetion code: * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED * WRMSError weighted rms error on the (X,Y). -- ALGLIB -- Copyright 17.08.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitresults(lsfitstate state, ref int info, ref double[] c, lsfitreport rep) { int i_ = 0; info = 0; c = new double[0]; info = state.repterminationtype; rep.varidx = state.repvaridx; if( info>0 ) { c = new double[state.k]; for(i_=0; i_<=state.k-1;i_++) { c[i_] = state.c[i_]; } rep.rmserror = state.reprmserror; rep.wrmserror = state.repwrmserror; rep.avgerror = state.repavgerror; rep.avgrelerror = state.repavgrelerror; rep.maxerror = state.repmaxerror; rep.iterationscount = state.repiterationscount; } }
/************************************************************************* This is "expert" 4PL/5PL fitting function, which can be used if you need better control over fitting process than provided by LogisticFit4() or LogisticFit5(). This function fits model of the form F(x|A,B,C,D) = D+(A-D)/(1+Power(x/C,B)) (4PL model) or F(x|A,B,C,D,G) = D+(A-D)/Power(1+Power(x/C,B),G) (5PL model) Here: * A, D - unconstrained * B>=0 for 4PL, unconstrained for 5PL * C>0 * G>0 (if present) INPUT PARAMETERS: X - array[N], stores X-values. MUST include only non-negative numbers (but may include zero values). Can be unsorted. Y - array[N], values to fit. N - number of points. If N is less than length of X/Y, only leading N elements are used. CnstrLeft- optional equality constraint for model value at the left boundary (at X=0). Specify NAN (Not-a-Number) if you do not need constraint on the model value at X=0 (in C++ you can pass alglib::fp_nan as parameter, in C# it will be Double.NaN). See below, section "EQUALITY CONSTRAINTS" for more information about constraints. CnstrRight- optional equality constraint for model value at X=infinity. Specify NAN (Not-a-Number) if you do not need constraint on the model value (in C++ you can pass alglib::fp_nan as parameter, in C# it will be Double.NaN). See below, section "EQUALITY CONSTRAINTS" for more information about constraints. Is4PL - whether 4PL or 5PL models are fitted LambdaV - regularization coefficient, LambdaV>=0. Set it to zero unless you know what you are doing. EpsX - stopping condition (step size), EpsX>=0. Zero value means that small step is automatically chosen. See notes below for more information. RsCnt - number of repeated restarts from random points. 4PL/5PL models are prone to problem of bad local extrema. Utilizing multiple random restarts allows us to improve algorithm convergence. RsCnt>=0. Zero value means that function automatically choose small amount of restarts (recommended). OUTPUT PARAMETERS: A, B, C, D- parameters of 4PL model G - parameter of 5PL model; for Is4PL=True, G=1 is returned. Rep - fitting report. This structure has many fields, but ONLY ONES LISTED BELOW ARE SET: * Rep.IterationsCount - number of iterations performed * Rep.RMSError - root-mean-square error * Rep.AvgError - average absolute error * Rep.AvgRelError - average relative error (calculated for non-zero Y-values) * Rep.MaxError - maximum absolute error * Rep.R2 - coefficient of determination, R-squared. This coefficient is calculated as R2=1-RSS/TSS (in case of nonlinear regression there are multiple ways to define R2, each of them giving different results). NOTE: after you obtained coefficients, you can evaluate model with LogisticCalc5() function. NOTE: step is automatically scaled according to scale of parameters being fitted before we compare its length with EpsX. Thus, this function can be used to fit data with very small or very large values without changing EpsX. EQUALITY CONSTRAINTS ON PARAMETERS 4PL/5PL solver supports equality constraints on model values at the left boundary (X=0) and right boundary (X=infinity). These constraints are completely optional and you can specify both of them, only one - or no constraints at all. Parameter CnstrLeft contains left constraint (or NAN for unconstrained fitting), and CnstrRight contains right one. For 4PL, left constraint ALWAYS corresponds to parameter A, and right one is ALWAYS constraint on D. That's because 4PL model is normalized in such way that B>=0. For 5PL model things are different. Unlike 4PL one, 5PL model is NOT symmetric with respect to change in sign of B. Thus, negative B's are possible, and left constraint may constrain parameter A (for positive B's) - or parameter D (for negative B's). Similarly changes meaning of right constraint. You do not have to decide what parameter to constrain - algorithm will automatically determine correct parameters as fitting progresses. However, question highlighted above is important when you interpret fitting results. -- ALGLIB PROJECT -- Copyright 14.02.2014 by Bochkanov Sergey *************************************************************************/ public static void logisticfit45x(double[] x, double[] y, int n, double cnstrleft, double cnstrright, bool is4pl, double lambdav, double epsx, int rscnt, ref double a, ref double b, ref double c, ref double d, ref double g, lsfitreport rep) { int i = 0; int k = 0; int innerit = 0; int outerit = 0; int nz = 0; double v = 0; double b00 = 0; double b01 = 0; double b10 = 0; double b11 = 0; double b30 = 0; double b31 = 0; double[] p0 = new double[0]; double[] p1 = new double[0]; double[] p2 = new double[0]; double[] bndl = new double[0]; double[] bndu = new double[0]; double[] s = new double[0]; double[,] z = new double[0,0]; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); minlm.minlmstate state = new minlm.minlmstate(); minlm.minlmreport replm = new minlm.minlmreport(); int maxits = 0; double fbest = 0; double flast = 0; double flast2 = 0; double scalex = 0; double scaley = 0; double[] bufx = new double[0]; double[] bufy = new double[0]; double rss = 0; double tss = 0; double meany = 0; x = (double[])x.Clone(); y = (double[])y.Clone(); a = 0; b = 0; c = 0; d = 0; g = 0; alglib.ap.assert(math.isfinite(epsx), "LogisticFitX: EpsX is infinite/NAN"); alglib.ap.assert(math.isfinite(lambdav), "LogisticFitX: LambdaV is infinite/NAN"); alglib.ap.assert(math.isfinite(cnstrleft) || Double.IsNaN(cnstrleft), "LogisticFitX: CnstrLeft is NOT finite or NAN"); alglib.ap.assert(math.isfinite(cnstrright) || Double.IsNaN(cnstrright), "LogisticFitX: CnstrRight is NOT finite or NAN"); alglib.ap.assert((double)(lambdav)>=(double)(0), "LogisticFitX: negative LambdaV"); alglib.ap.assert(n>0, "LogisticFitX: N<=0"); alglib.ap.assert(rscnt>=0, "LogisticFitX: RsCnt<0"); alglib.ap.assert((double)(epsx)>=(double)(0), "LogisticFitX: EpsX<0"); alglib.ap.assert(alglib.ap.len(x)>=n, "LogisticFitX: Length(X)<N"); alglib.ap.assert(alglib.ap.len(y)>=n, "LogisticFitX: Length(Y)<N"); alglib.ap.assert(apserv.isfinitevector(x, n), "LogisticFitX: X contains infinite/NAN values"); alglib.ap.assert(apserv.isfinitevector(y, n), "LogisticFitX: X contains infinite/NAN values"); hqrnd.hqrndseed(2211, 1033044, rs); clearreport(rep); if( (double)(epsx)==(double)(0) ) { epsx = 1.0E-10; } if( rscnt==0 ) { rscnt = 4; } maxits = 1000; // // Sort points by X. // Determine number of zero and non-zero values. // tsort.tagsortfastr(ref x, ref y, ref bufx, ref bufy, n); alglib.ap.assert((double)(x[0])>=(double)(0), "LogisticFitX: some X[] are negative"); nz = n; for(i=0; i<=n-1; i++) { if( (double)(x[i])>(double)(0) ) { nz = i; break; } } // // For NZ=N (all X[] are zero) special code is used. // For NZ<N we use general-purpose code. // rep.iterationscount = 0; if( nz==n ) { // // NZ=N, degenerate problem. // No need to run optimizer. // v = 0.0; for(i=0; i<=n-1; i++) { v = v+y[i]; } v = v/n; if( math.isfinite(cnstrleft) ) { a = cnstrleft; } else { a = v; } b = 1; c = 1; if( math.isfinite(cnstrright) ) { d = cnstrright; } else { d = a; } g = 1; } else { // // Non-degenerate problem. // Determine scale of data. // scalex = x[nz+(n-nz)/2]; alglib.ap.assert((double)(scalex)>(double)(0), "LogisticFitX: internal error"); v = 0.0; for(i=0; i<=n-1; i++) { v = v+y[i]; } v = v/n; scaley = 0.0; for(i=0; i<=n-1; i++) { scaley = scaley+math.sqr(y[i]-v); } scaley = Math.Sqrt(scaley/n); if( (double)(scaley)==(double)(0) ) { scaley = 1.0; } s = new double[5]; s[0] = scaley; s[1] = 0.1; s[2] = scalex; s[3] = scaley; s[4] = 0.1; p0 = new double[5]; p0[0] = 0; p0[1] = 0; p0[2] = 0; p0[3] = 0; p0[4] = 0; bndl = new double[5]; bndu = new double[5]; minlm.minlmcreatevj(5, n+5, p0, state); minlm.minlmsetscale(state, s); minlm.minlmsetcond(state, 0.0, 0.0, epsx, maxits); minlm.minlmsetxrep(state, true); // // Main loop - includes THREE (!) nested iterations: // // 1. Inner iteration is minimization of target function from // the current initial point P1 subject to boundary constraints // given by arrays BndL and BndU. // // 2. Middle iteration changes boundary constraints from tight to // relaxed ones: // * at the first middle iteration we optimize with "tight" // constraints on parameters B and C (P[1] and P[2]). It // allows us to find good initial point for the next middle // iteration without risk of running into "hard" points (B=0, C=0). // Initial point is initialized by outer iteration. // Solution is placed to P1. // * at the second middle iteration we relax boundary constraints // on B and C. Solution P1 from the first middle iteration is // used as initial point for the second one. // * both first and second iterations are 4PL models, even when // we fit 5PL. // * additionally, for 5PL models, we use results from the second // middle iteration is initial guess for 5PL fit. // * after middle iteration is over we compare quality of the // solution stored in P1 and offload it to A/B/C/D/G, if it // is better. // // 3. Outer iteration (starts below) changes following parameters: // * initial point // * "tight" constraints BndL/BndU // * "relaxed" constraints BndL/BndU // // Below we prepare combined matrix Z of optimization settings for // outer/middle iterations: // // [ P00 BndL00 BndU00 BndL01 BndU01 ] // [ ] // [ P10 BndL10 BndU10 BndL11 BndU11 ] // // Here: // * Pi0 is initial point for I-th outer iteration // * BndLij is lower boundary for I-th outer iteration, J-th inner iteration // * BndUij - same as BndLij // z = new double[rscnt, 5+4*5]; for(i=0; i<=rscnt-1; i++) { if( math.isfinite(cnstrleft) ) { z[i,0] = cnstrleft; } else { z[i,0] = y[0]+0.25*scaley*(hqrnd.hqrnduniformr(rs)-0.5); } z[i,1] = 0.5+hqrnd.hqrnduniformr(rs); z[i,2] = x[nz+hqrnd.hqrnduniformi(rs, n-nz)]; if( math.isfinite(cnstrright) ) { z[i,3] = cnstrright; } else { z[i,3] = y[n-1]+0.25*scaley*(hqrnd.hqrnduniformr(rs)-0.5); } z[i,4] = 1.0; if( math.isfinite(cnstrleft) ) { z[i,5+0] = cnstrleft; z[i,10+0] = cnstrleft; } else { z[i,5+0] = Double.NegativeInfinity; z[i,10+0] = Double.PositiveInfinity; } z[i,5+1] = 0.5; z[i,10+1] = 2.0; z[i,5+2] = 0.5*scalex; z[i,10+2] = 2.0*scalex; if( math.isfinite(cnstrright) ) { z[i,5+3] = cnstrright; z[i,10+3] = cnstrright; } else { z[i,5+3] = Double.NegativeInfinity; z[i,10+3] = Double.PositiveInfinity; } z[i,5+4] = 1.0; z[i,10+4] = 1.0; if( math.isfinite(cnstrleft) ) { z[i,15+0] = cnstrleft; z[i,20+0] = cnstrleft; } else { z[i,15+0] = Double.NegativeInfinity; z[i,20+0] = Double.PositiveInfinity; } z[i,15+1] = 0.01; z[i,20+1] = Double.PositiveInfinity; z[i,15+2] = math.machineepsilon*scalex; z[i,20+2] = Double.PositiveInfinity; if( math.isfinite(cnstrright) ) { z[i,15+3] = cnstrright; z[i,20+3] = cnstrright; } else { z[i,15+3] = Double.NegativeInfinity; z[i,20+3] = Double.PositiveInfinity; } z[i,15+4] = 1.0; z[i,20+4] = 1.0; } // // Run outer iterations // a = 0; b = 1; c = 1; d = 1; g = 1; fbest = math.maxrealnumber; p1 = new double[5]; p2 = new double[5]; for(outerit=0; outerit<=alglib.ap.rows(z)-1; outerit++) { // // Beginning of the middle iterations. // Prepare initial point P1. // for(i=0; i<=4; i++) { p1[i] = z[outerit,i]; } flast = math.maxrealnumber; for(innerit=0; innerit<=1; innerit++) { // // Set current boundary constraints. // Run inner iteration. // for(i=0; i<=4; i++) { bndl[i] = z[outerit,5+innerit*10+0+i]; bndu[i] = z[outerit,5+innerit*10+5+i]; } minlm.minlmsetbc(state, bndl, bndu); logisticfitinternal(x, y, n, true, lambdav, state, replm, ref p1, ref flast); rep.iterationscount = rep.iterationscount+replm.iterationscount; } // // Middle iteration: try to fit with 5-parameter logistic model (if needed). // // We perform two attempts to fit: one with B>0, another one with B<0. // For PL4, these are equivalent up to transposition of A/D, but for 5PL // sign of B is very important. // // NOTE: results of 4PL fit are used as initial point for 5PL. // if( !is4pl ) { // // Loosen constraints on G, // save constraints on A/B/D to B0/B1 // bndl[4] = 0.1; bndu[4] = 10.0; b00 = bndl[0]; b01 = bndu[0]; b10 = bndl[1]; b11 = bndu[1]; b30 = bndl[3]; b31 = bndu[3]; // // First attempt: fitting with positive B // p2[0] = p1[0]; p2[1] = p1[1]; p2[2] = p1[2]; p2[3] = p1[3]; p2[4] = p1[4]; bndl[0] = b00; bndu[0] = b01; bndl[1] = b10; bndu[1] = b11; bndl[3] = b30; bndu[3] = b31; minlm.minlmsetbc(state, bndl, bndu); logisticfitinternal(x, y, n, false, lambdav, state, replm, ref p2, ref flast2); rep.iterationscount = rep.iterationscount+replm.iterationscount; if( (double)(flast2)<(double)(flast) ) { for(i=0; i<=4; i++) { p1[i] = p2[i]; } flast = flast2; } // // First attempt: fitting with negative B // p2[0] = p1[3]; p2[1] = -p1[1]; p2[2] = p1[2]; p2[3] = p1[0]; p2[4] = p1[4]; bndl[0] = b30; bndu[0] = b31; bndl[1] = -b11; bndu[1] = -b10; bndl[3] = b00; bndu[3] = b01; minlm.minlmsetbc(state, bndl, bndu); logisticfitinternal(x, y, n, false, lambdav, state, replm, ref p2, ref flast2); rep.iterationscount = rep.iterationscount+replm.iterationscount; if( (double)(flast2)<(double)(flast) ) { for(i=0; i<=4; i++) { p1[i] = p2[i]; } flast = flast2; } } // // Middle iteration is done, compare its results with best value // found so far. // if( (double)(flast)<(double)(fbest) ) { a = p1[0]; b = p1[1]; c = p1[2]; d = p1[3]; g = p1[4]; fbest = flast; } } } // // Calculate errors // rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; rep.maxerror = 0; k = 0; rss = 0.0; tss = 0.0; meany = 0.0; for(i=0; i<=n-1; i++) { meany = meany+y[i]; } meany = meany/n; for(i=0; i<=n-1; i++) { // // Calculate residual from regression // if( (double)(x[i])>(double)(0) ) { v = d+(a-d)/Math.Pow(1.0+Math.Pow(x[i]/c, b), g)-y[i]; } else { if( (double)(b)>=(double)(0) ) { v = a-y[i]; } else { v = d-y[i]; } } // // Update RSS (residual sum of squares) and TSS (total sum of squares) // which are used to calculate coefficient of determination. // // NOTE: we use formula R2 = 1-RSS/TSS because it has nice property of // being equal to 0.0 if and only if model perfectly fits data. // // When we fit nonlinear models, there are exist multiple ways of // determining R2, each of them giving different results. Formula // above is the most intuitive one. // rss = rss+v*v; tss = tss+math.sqr(y[i]-meany); // // Update errors // rep.rmserror = rep.rmserror+math.sqr(v); rep.avgerror = rep.avgerror+Math.Abs(v); if( (double)(y[i])!=(double)(0) ) { rep.avgrelerror = rep.avgrelerror+Math.Abs(v/y[i]); k = k+1; } rep.maxerror = Math.Max(rep.maxerror, Math.Abs(v)); } rep.rmserror = Math.Sqrt(rep.rmserror/n); rep.avgerror = rep.avgerror/n; if( k>0 ) { rep.avgrelerror = rep.avgrelerror/k; } rep.r2 = 1.0-rss/tss; }
/************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_lsfitlinearw(double[] y, double[] w, double[,] fmatrix, int n, int m, ref int info, ref double[] c, lsfitreport rep) { lsfitlinearw(y,w,fmatrix,n,m,ref info,ref c,rep); }
/************************************************************************* This function fits five-parameter logistic (5PL) model to data provided by user, subject to optional equality constraints on parameters A and D. 5PL model has following form: F(x|A,B,C,D,G) = D+(A-D)/Power(1+Power(x/C,B),G) Here: * A, D - with optional equality constraints * B - unconstrained * C>0 * G>0 IMPORTANT: unlike in 4PL fitting, output of this function is NOT constrained in such way that B is guaranteed to be positive. Furthermore, unlike 4PL, 5PL model is NOT symmetric with respect to B, so you can NOT transform model to equivalent one, with B having desired sign (>0 or <0). 5PL fitting is implemented as follows: * we perform small number of restarts from random locations which helps to solve problem of bad local extrema. Locations are only partially random - we use input data to determine good initial guess, but we include controlled amount of randomness. * we perform Levenberg-Marquardt fitting with very tight constraints on parameters B and C - it allows us to find good initial guess for the second stage without risk of running into "flat spot". Parameter G is fixed at G=1. * second Levenberg-Marquardt round is performed without excessive constraints on B and C, but with G still equal to 1. Results from the previous round are used as initial guess. * third Levenberg-Marquardt round relaxes constraints on G and tries two different models - one with B>0 and one with B<0. * after fitting is done, we compare results with best values found so far, rewrite "best solution" if needed, and move to next random location. Overall algorithm is very stable and is not prone to bad local extrema. Furthermore, it automatically scales when input data have very large or very small range. INPUT PARAMETERS: X - array[N], stores X-values. MUST include only non-negative numbers (but may include zero values). Can be unsorted. Y - array[N], values to fit. N - number of points. If N is less than length of X/Y, only leading N elements are used. CnstrLeft- optional equality constraint for model value at the left boundary (at X=0). Specify NAN (Not-a-Number) if you do not need constraint on the model value at X=0 (in C++ you can pass alglib::fp_nan as parameter, in C# it will be Double.NaN). See below, section "EQUALITY CONSTRAINTS" for more information about constraints. CnstrRight- optional equality constraint for model value at X=infinity. Specify NAN (Not-a-Number) if you do not need constraint on the model value (in C++ you can pass alglib::fp_nan as parameter, in C# it will be Double.NaN). See below, section "EQUALITY CONSTRAINTS" for more information about constraints. OUTPUT PARAMETERS: A,B,C,D,G- parameters of 5PL model Rep - fitting report. This structure has many fields, but ONLY ONES LISTED BELOW ARE SET: * Rep.IterationsCount - number of iterations performed * Rep.RMSError - root-mean-square error * Rep.AvgError - average absolute error * Rep.AvgRelError - average relative error (calculated for non-zero Y-values) * Rep.MaxError - maximum absolute error * Rep.R2 - coefficient of determination, R-squared. This coefficient is calculated as R2=1-RSS/TSS (in case of nonlinear regression there are multiple ways to define R2, each of them giving different results). NOTE: after you obtained coefficients, you can evaluate model with LogisticCalc5() function. NOTE: if you need better control over fitting process than provided by this function, you may use LogisticFit45X(). NOTE: step is automatically scaled according to scale of parameters being fitted before we compare its length with EpsX. Thus, this function can be used to fit data with very small or very large values without changing EpsX. EQUALITY CONSTRAINTS ON PARAMETERS 5PL solver supports equality constraints on model values at the left boundary (X=0) and right boundary (X=infinity). These constraints are completely optional and you can specify both of them, only one - or no constraints at all. Parameter CnstrLeft contains left constraint (or NAN for unconstrained fitting), and CnstrRight contains right one. Unlike 4PL one, 5PL model is NOT symmetric with respect to change in sign of B. Thus, negative B's are possible, and left constraint may constrain parameter A (for positive B's) - or parameter D (for negative B's). Similarly changes meaning of right constraint. You do not have to decide what parameter to constrain - algorithm will automatically determine correct parameters as fitting progresses. However, question highlighted above is important when you interpret fitting results. -- ALGLIB PROJECT -- Copyright 14.02.2014 by Bochkanov Sergey *************************************************************************/ public static void logisticfit5ec(double[] x, double[] y, int n, double cnstrleft, double cnstrright, ref double a, ref double b, ref double c, ref double d, ref double g, lsfitreport rep) { x = (double[])x.Clone(); y = (double[])y.Clone(); a = 0; b = 0; c = 0; d = 0; g = 0; logisticfit45x(x, y, n, cnstrleft, cnstrright, false, 0.0, 0.0, 0, ref a, ref b, ref c, ref d, ref g, rep); }
/************************************************************************* This function fits five-parameter logistic (5PL) model to data provided by user. 5PL model has following form: F(x|A,B,C,D,G) = D+(A-D)/Power(1+Power(x/C,B),G) Here: * A, D - unconstrained * B - unconstrained * C>0 * G>0 IMPORTANT: unlike in 4PL fitting, output of this function is NOT constrained in such way that B is guaranteed to be positive. Furthermore, unlike 4PL, 5PL model is NOT symmetric with respect to B, so you can NOT transform model to equivalent one, with B having desired sign (>0 or <0). 5PL fitting is implemented as follows: * we perform small number of restarts from random locations which helps to solve problem of bad local extrema. Locations are only partially random - we use input data to determine good initial guess, but we include controlled amount of randomness. * we perform Levenberg-Marquardt fitting with very tight constraints on parameters B and C - it allows us to find good initial guess for the second stage without risk of running into "flat spot". Parameter G is fixed at G=1. * second Levenberg-Marquardt round is performed without excessive constraints on B and C, but with G still equal to 1. Results from the previous round are used as initial guess. * third Levenberg-Marquardt round relaxes constraints on G and tries two different models - one with B>0 and one with B<0. * after fitting is done, we compare results with best values found so far, rewrite "best solution" if needed, and move to next random location. Overall algorithm is very stable and is not prone to bad local extrema. Furthermore, it automatically scales when input data have very large or very small range. INPUT PARAMETERS: X - array[N], stores X-values. MUST include only non-negative numbers (but may include zero values). Can be unsorted. Y - array[N], values to fit. N - number of points. If N is less than length of X/Y, only leading N elements are used. OUTPUT PARAMETERS: A,B,C,D,G- parameters of 5PL model Rep - fitting report. This structure has many fields, but ONLY ONES LISTED BELOW ARE SET: * Rep.IterationsCount - number of iterations performed * Rep.RMSError - root-mean-square error * Rep.AvgError - average absolute error * Rep.AvgRelError - average relative error (calculated for non-zero Y-values) * Rep.MaxError - maximum absolute error * Rep.R2 - coefficient of determination, R-squared. This coefficient is calculated as R2=1-RSS/TSS (in case of nonlinear regression there are multiple ways to define R2, each of them giving different results). NOTE: after you obtained coefficients, you can evaluate model with LogisticCalc5() function. NOTE: if you need better control over fitting process than provided by this function, you may use LogisticFit45X(). NOTE: step is automatically scaled according to scale of parameters being fitted before we compare its length with EpsX. Thus, this function can be used to fit data with very small or very large values without changing EpsX. -- ALGLIB PROJECT -- Copyright 14.02.2014 by Bochkanov Sergey *************************************************************************/ public static void logisticfit5(double[] x, double[] y, int n, ref double a, ref double b, ref double c, ref double d, ref double g, lsfitreport rep) { x = (double[])x.Clone(); y = (double[])y.Clone(); a = 0; b = 0; c = 0; d = 0; g = 0; logisticfit45x(x, y, n, Double.NaN, Double.NaN, false, 0.0, 0.0, 0, ref a, ref b, ref c, ref d, ref g, rep); }
/************************************************************************* Nonlinear least squares fitting results. Called after return from LSFitFit(). INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: Info - completetion code: * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible C - array[0..K-1], solution Rep - optimization report. Following fields are set: * Rep.TerminationType completetion code: * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED -- ALGLIB -- Copyright 17.08.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitresults(lsfitstate state, out int info, out double[] c, out lsfitreport rep) { info = 0; c = new double[0]; rep = new lsfitreport(); lsfit.lsfitresults(state.innerobj, ref info, ref c, rep.innerobj); return; }
/************************************************************************* Nonlinear least squares fitting results. Called after return from LSFitFit(). INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: Info - completion code: * -7 gradient verification failed. See LSFitSetGradientCheck() for more information. * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible C - array[0..K-1], solution Rep - optimization report. On success following fields are set: * R2 non-adjusted coefficient of determination (non-weighted) * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED * WRMSError weighted rms error on the (X,Y). ERRORS IN PARAMETERS This solver also calculates different kinds of errors in parameters and fills corresponding fields of report: * Rep.CovPar covariance matrix for parameters, array[K,K]. * Rep.ErrPar errors in parameters, array[K], errpar = sqrt(diag(CovPar)) * Rep.ErrCurve vector of fit errors - standard deviations of empirical best-fit curve from "ideal" best-fit curve built with infinite number of samples, array[N]. errcurve = sqrt(diag(J*CovPar*J')), where J is Jacobian matrix. * Rep.Noise vector of per-point estimates of noise, array[N] IMPORTANT: errors in parameters are calculated without taking into account boundary/linear constraints! Presence of constraints changes distribution of errors, but there is no easy way to account for constraints when you calculate covariance matrix. NOTE: noise in the data is estimated as follows: * for fitting without user-supplied weights all points are assumed to have same level of noise, which is estimated from the data * for fitting with user-supplied weights we assume that noise level in I-th point is inversely proportional to Ith weight. Coefficient of proportionality is estimated from the data. NOTE: we apply small amount of regularization when we invert squared Jacobian and calculate covariance matrix. It guarantees that algorithm won't divide by zero during inversion, but skews error estimates a bit (fractional error is about 10^-9). However, we believe that this difference is insignificant for all practical purposes except for the situation when you want to compare ALGLIB results with "reference" implementation up to the last significant digit. NOTE: covariance matrix is estimated using correction for degrees of freedom (covariances are divided by N-M instead of dividing by N). -- ALGLIB -- Copyright 17.08.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitresults(lsfitstate state, ref int info, ref double[] c, lsfitreport rep) { int i = 0; int j = 0; int i_ = 0; info = 0; c = new double[0]; clearreport(rep); info = state.repterminationtype; rep.varidx = state.repvaridx; if( info>0 ) { c = new double[state.k]; for(i_=0; i_<=state.k-1;i_++) { c[i_] = state.c[i_]; } rep.rmserror = state.reprmserror; rep.wrmserror = state.repwrmserror; rep.avgerror = state.repavgerror; rep.avgrelerror = state.repavgrelerror; rep.maxerror = state.repmaxerror; rep.iterationscount = state.repiterationscount; rep.covpar = new double[state.k, state.k]; rep.errpar = new double[state.k]; rep.errcurve = new double[state.npoints]; rep.noise = new double[state.npoints]; rep.r2 = state.rep.r2; for(i=0; i<=state.k-1; i++) { for(j=0; j<=state.k-1; j++) { rep.covpar[i,j] = state.rep.covpar[i,j]; } rep.errpar[i] = state.rep.errpar[i]; } for(i=0; i<=state.npoints-1; i++) { rep.errcurve[i] = state.rep.errcurve[i]; rep.noise[i] = state.rep.noise[i]; } } }
public override alglib.apobject make_copy() { lsfitreport _result = new lsfitreport(); _result.taskrcond = taskrcond; _result.iterationscount = iterationscount; _result.varidx = varidx; _result.rmserror = rmserror; _result.avgerror = avgerror; _result.avgrelerror = avgrelerror; _result.maxerror = maxerror; _result.wrmserror = wrmserror; _result.covpar = (double[,])covpar.Clone(); _result.errpar = (double[])errpar.Clone(); _result.errcurve = (double[])errcurve.Clone(); _result.noise = (double[])noise.Clone(); _result.r2 = r2; return _result; }
/************************************************************************* Internal spline fitting subroutine -- ALGLIB PROJECT -- Copyright 08.09.2009 by Bochkanov Sergey *************************************************************************/ private static void spline1dfitinternal(int st, double[] x, double[] y, double[] w, int n, double[] xc, double[] yc, int[] dc, int k, int m, ref int info, spline1d.spline1dinterpolant s, spline1dfitreport rep) { double[,] fmatrix = new double[0,0]; double[,] cmatrix = new double[0,0]; double[] y2 = new double[0]; double[] w2 = new double[0]; double[] sx = new double[0]; double[] sy = new double[0]; double[] sd = new double[0]; double[] tmp = new double[0]; double[] xoriginal = new double[0]; double[] yoriginal = new double[0]; lsfitreport lrep = new lsfitreport(); double v0 = 0; double v1 = 0; double v2 = 0; double mx = 0; spline1d.spline1dinterpolant s2 = new spline1d.spline1dinterpolant(); int i = 0; int j = 0; int relcnt = 0; double xa = 0; double xb = 0; double sa = 0; double sb = 0; double bl = 0; double br = 0; double decay = 0; int i_ = 0; x = (double[])x.Clone(); y = (double[])y.Clone(); w = (double[])w.Clone(); xc = (double[])xc.Clone(); yc = (double[])yc.Clone(); info = 0; alglib.ap.assert(st==0 || st==1, "Spline1DFit: internal error!"); if( st==0 && m<4 ) { info = -1; return; } if( st==1 && m<4 ) { info = -1; return; } if( (n<1 || k<0) || k>=m ) { info = -1; return; } for(i=0; i<=k-1; i++) { info = 0; if( dc[i]<0 ) { info = -1; } if( dc[i]>1 ) { info = -1; } if( info<0 ) { return; } } if( st==1 && m%2!=0 ) { // // Hermite fitter must have even number of basis functions // info = -2; return; } // // weight decay for correct handling of task which becomes // degenerate after constraints are applied // decay = 10000*math.machineepsilon; // // Scale X, Y, XC, YC // lsfitscalexy(ref x, ref y, ref w, n, ref xc, ref yc, dc, k, ref xa, ref xb, ref sa, ref sb, ref xoriginal, ref yoriginal); // // allocate space, initialize: // * SX - grid for basis functions // * SY - values of basis functions at grid points // * FMatrix- values of basis functions at X[] // * CMatrix- values (derivatives) of basis functions at XC[] // y2 = new double[n+m]; w2 = new double[n+m]; fmatrix = new double[n+m, m]; if( k>0 ) { cmatrix = new double[k, m+1]; } if( st==0 ) { // // allocate space for cubic spline // sx = new double[m-2]; sy = new double[m-2]; for(j=0; j<=m-2-1; j++) { sx[j] = (double)(2*j)/(double)(m-2-1)-1; } } if( st==1 ) { // // allocate space for Hermite spline // sx = new double[m/2]; sy = new double[m/2]; sd = new double[m/2]; for(j=0; j<=m/2-1; j++) { sx[j] = (double)(2*j)/(double)(m/2-1)-1; } } // // Prepare design and constraints matrices: // * fill constraints matrix // * fill first N rows of design matrix with values // * fill next M rows of design matrix with regularizing term // * append M zeros to Y // * append M elements, mean(abs(W)) each, to W // for(j=0; j<=m-1; j++) { // // prepare Jth basis function // if( st==0 ) { // // cubic spline basis // for(i=0; i<=m-2-1; i++) { sy[i] = 0; } bl = 0; br = 0; if( j<m-2 ) { sy[j] = 1; } if( j==m-2 ) { bl = 1; } if( j==m-1 ) { br = 1; } spline1d.spline1dbuildcubic(sx, sy, m-2, 1, bl, 1, br, s2); } if( st==1 ) { // // Hermite basis // for(i=0; i<=m/2-1; i++) { sy[i] = 0; sd[i] = 0; } if( j%2==0 ) { sy[j/2] = 1; } else { sd[j/2] = 1; } spline1d.spline1dbuildhermite(sx, sy, sd, m/2, s2); } // // values at X[], XC[] // for(i=0; i<=n-1; i++) { fmatrix[i,j] = spline1d.spline1dcalc(s2, x[i]); } for(i=0; i<=k-1; i++) { alglib.ap.assert(dc[i]>=0 && dc[i]<=2, "Spline1DFit: internal error!"); spline1d.spline1ddiff(s2, xc[i], ref v0, ref v1, ref v2); if( dc[i]==0 ) { cmatrix[i,j] = v0; } if( dc[i]==1 ) { cmatrix[i,j] = v1; } if( dc[i]==2 ) { cmatrix[i,j] = v2; } } } for(i=0; i<=k-1; i++) { cmatrix[i,m] = yc[i]; } for(i=0; i<=m-1; i++) { for(j=0; j<=m-1; j++) { if( i==j ) { fmatrix[n+i,j] = decay; } else { fmatrix[n+i,j] = 0; } } } y2 = new double[n+m]; w2 = new double[n+m]; for(i_=0; i_<=n-1;i_++) { y2[i_] = y[i_]; } for(i_=0; i_<=n-1;i_++) { w2[i_] = w[i_]; } mx = 0; for(i=0; i<=n-1; i++) { mx = mx+Math.Abs(w[i]); } mx = mx/n; for(i=0; i<=m-1; i++) { y2[n+i] = 0; w2[n+i] = mx; } // // Solve constrained task // if( k>0 ) { // // solve using regularization // lsfitlinearwc(y2, w2, fmatrix, cmatrix, n+m, m, k, ref info, ref tmp, lrep); } else { // // no constraints, no regularization needed // lsfitlinearwc(y, w, fmatrix, cmatrix, n, m, k, ref info, ref tmp, lrep); } if( info<0 ) { return; } // // Generate spline and scale it // if( st==0 ) { // // cubic spline basis // for(i_=0; i_<=m-2-1;i_++) { sy[i_] = tmp[i_]; } spline1d.spline1dbuildcubic(sx, sy, m-2, 1, tmp[m-2], 1, tmp[m-1], s); } if( st==1 ) { // // Hermite basis // for(i=0; i<=m/2-1; i++) { sy[i] = tmp[2*i]; sd[i] = tmp[2*i+1]; } spline1d.spline1dbuildhermite(sx, sy, sd, m/2, s); } spline1d.spline1dlintransx(s, 2/(xb-xa), -((xa+xb)/(xb-xa))); spline1d.spline1dlintransy(s, sb-sa, sa); // // Scale absolute errors obtained from LSFitLinearW. // Relative error should be calculated separately // (because of shifting/scaling of the task) // rep.taskrcond = lrep.taskrcond; rep.rmserror = lrep.rmserror*(sb-sa); rep.avgerror = lrep.avgerror*(sb-sa); rep.maxerror = lrep.maxerror*(sb-sa); rep.avgrelerror = 0; relcnt = 0; for(i=0; i<=n-1; i++) { if( (double)(yoriginal[i])!=(double)(0) ) { rep.avgrelerror = rep.avgrelerror+Math.Abs(spline1d.spline1dcalc(s, xoriginal[i])-yoriginal[i])/Math.Abs(yoriginal[i]); relcnt = relcnt+1; } } if( relcnt!=0 ) { rep.avgrelerror = rep.avgrelerror/relcnt; } }
public override void init() { s = new double[0]; bndl = new double[0]; bndu = new double[0]; taskx = new double[0,0]; tasky = new double[0]; taskw = new double[0]; x = new double[0]; c = new double[0]; g = new double[0]; h = new double[0,0]; wcur = new double[0]; tmp = new double[0]; tmpf = new double[0]; tmpjac = new double[0,0]; tmpjacw = new double[0,0]; invrep = new matinv.matinvreport(); rep = new lsfitreport(); optstate = new minlm.minlmstate(); optrep = new minlm.minlmreport(); rstate = new rcommstate(); }
/************************************************************************* Internal Floater-Hormann fitting subroutine for fixed D *************************************************************************/ private static void barycentricfitwcfixedd(double[] x, double[] y, double[] w, int n, double[] xc, double[] yc, int[] dc, int k, int m, int d, ref int info, ratint.barycentricinterpolant b, barycentricfitreport rep) { double[,] fmatrix = new double[0,0]; double[,] cmatrix = new double[0,0]; double[] y2 = new double[0]; double[] w2 = new double[0]; double[] sx = new double[0]; double[] sy = new double[0]; double[] sbf = new double[0]; double[] xoriginal = new double[0]; double[] yoriginal = new double[0]; double[] tmp = new double[0]; lsfitreport lrep = new lsfitreport(); double v0 = 0; double v1 = 0; double mx = 0; ratint.barycentricinterpolant b2 = new ratint.barycentricinterpolant(); int i = 0; int j = 0; int relcnt = 0; double xa = 0; double xb = 0; double sa = 0; double sb = 0; double decay = 0; int i_ = 0; x = (double[])x.Clone(); y = (double[])y.Clone(); w = (double[])w.Clone(); xc = (double[])xc.Clone(); yc = (double[])yc.Clone(); info = 0; if( ((n<1 || m<2) || k<0) || k>=m ) { info = -1; return; } for(i=0; i<=k-1; i++) { info = 0; if( dc[i]<0 ) { info = -1; } if( dc[i]>1 ) { info = -1; } if( info<0 ) { return; } } // // weight decay for correct handling of task which becomes // degenerate after constraints are applied // decay = 10000*math.machineepsilon; // // Scale X, Y, XC, YC // lsfitscalexy(ref x, ref y, ref w, n, ref xc, ref yc, dc, k, ref xa, ref xb, ref sa, ref sb, ref xoriginal, ref yoriginal); // // allocate space, initialize: // * FMatrix- values of basis functions at X[] // * CMatrix- values (derivatives) of basis functions at XC[] // y2 = new double[n+m]; w2 = new double[n+m]; fmatrix = new double[n+m, m]; if( k>0 ) { cmatrix = new double[k, m+1]; } y2 = new double[n+m]; w2 = new double[n+m]; // // Prepare design and constraints matrices: // * fill constraints matrix // * fill first N rows of design matrix with values // * fill next M rows of design matrix with regularizing term // * append M zeros to Y // * append M elements, mean(abs(W)) each, to W // sx = new double[m]; sy = new double[m]; sbf = new double[m]; for(j=0; j<=m-1; j++) { sx[j] = (double)(2*j)/(double)(m-1)-1; } for(i=0; i<=m-1; i++) { sy[i] = 1; } ratint.barycentricbuildfloaterhormann(sx, sy, m, d, b2); mx = 0; for(i=0; i<=n-1; i++) { barycentriccalcbasis(b2, x[i], ref sbf); for(i_=0; i_<=m-1;i_++) { fmatrix[i,i_] = sbf[i_]; } y2[i] = y[i]; w2[i] = w[i]; mx = mx+Math.Abs(w[i])/n; } for(i=0; i<=m-1; i++) { for(j=0; j<=m-1; j++) { if( i==j ) { fmatrix[n+i,j] = decay; } else { fmatrix[n+i,j] = 0; } } y2[n+i] = 0; w2[n+i] = mx; } if( k>0 ) { for(j=0; j<=m-1; j++) { for(i=0; i<=m-1; i++) { sy[i] = 0; } sy[j] = 1; ratint.barycentricbuildfloaterhormann(sx, sy, m, d, b2); for(i=0; i<=k-1; i++) { alglib.ap.assert(dc[i]>=0 && dc[i]<=1, "BarycentricFit: internal error!"); ratint.barycentricdiff1(b2, xc[i], ref v0, ref v1); if( dc[i]==0 ) { cmatrix[i,j] = v0; } if( dc[i]==1 ) { cmatrix[i,j] = v1; } } } for(i=0; i<=k-1; i++) { cmatrix[i,m] = yc[i]; } } // // Solve constrained task // if( k>0 ) { // // solve using regularization // lsfitlinearwc(y2, w2, fmatrix, cmatrix, n+m, m, k, ref info, ref tmp, lrep); } else { // // no constraints, no regularization needed // lsfitlinearwc(y, w, fmatrix, cmatrix, n, m, k, ref info, ref tmp, lrep); } if( info<0 ) { return; } // // Generate interpolant and scale it // for(i_=0; i_<=m-1;i_++) { sy[i_] = tmp[i_]; } ratint.barycentricbuildfloaterhormann(sx, sy, m, d, b); ratint.barycentriclintransx(b, 2/(xb-xa), -((xa+xb)/(xb-xa))); ratint.barycentriclintransy(b, sb-sa, sa); // // Scale absolute errors obtained from LSFitLinearW. // Relative error should be calculated separately // (because of shifting/scaling of the task) // rep.taskrcond = lrep.taskrcond; rep.rmserror = lrep.rmserror*(sb-sa); rep.avgerror = lrep.avgerror*(sb-sa); rep.maxerror = lrep.maxerror*(sb-sa); rep.avgrelerror = 0; relcnt = 0; for(i=0; i<=n-1; i++) { if( (double)(yoriginal[i])!=(double)(0) ) { rep.avgrelerror = rep.avgrelerror+Math.Abs(ratint.barycentriccalc(b, xoriginal[i])-yoriginal[i])/Math.Abs(yoriginal[i]); relcnt = relcnt+1; } } if( relcnt!=0 ) { rep.avgrelerror = rep.avgrelerror/relcnt; } }
/************************************************************************* Weighted fitting by polynomials in barycentric form, with constraints on function values or first derivatives. Small regularizing term is used when solving constrained tasks (to improve stability). Task is linear, so linear least squares solver is used. Complexity of this computational scheme is O(N*M^2), mostly dominated by least squares solver SEE ALSO: PolynomialFit() INPUT PARAMETERS: X - points, array[0..N-1]. Y - function values, array[0..N-1]. W - weights, array[0..N-1] Each summand in square sum of approximation deviations from given values is multiplied by the square of corresponding weight. Fill it by 1's if you don't want to solve weighted task. N - number of points, N>0. * if given, only leading N elements of X/Y/W are used * if not given, automatically determined from sizes of X/Y/W XC - points where polynomial values/derivatives are constrained, array[0..K-1]. YC - values of constraints, array[0..K-1] DC - array[0..K-1], types of constraints: * DC[i]=0 means that P(XC[i])=YC[i] * DC[i]=1 means that P'(XC[i])=YC[i] SEE BELOW FOR IMPORTANT INFORMATION ON CONSTRAINTS K - number of constraints, 0<=K<M. K=0 means no constraints (XC/YC/DC are not used in such cases) M - number of basis functions (= polynomial_degree + 1), M>=1 OUTPUT PARAMETERS: Info- same format as in LSFitLinearW() subroutine: * Info>0 task is solved * Info<=0 an error occured: -4 means inconvergence of internal SVD -3 means inconsistent constraints P - interpolant in barycentric form. Rep - report, same format as in LSFitLinearW() subroutine. Following fields are set: * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED IMPORTANT: this subroitine doesn't calculate task's condition number for K<>0. NOTES: you can convert P from barycentric form to the power or Chebyshev basis with PolynomialBar2Pow() or PolynomialBar2Cheb() functions from POLINT subpackage. SETTING CONSTRAINTS - DANGERS AND OPPORTUNITIES: Setting constraints can lead to undesired results, like ill-conditioned behavior, or inconsistency being detected. From the other side, it allows us to improve quality of the fit. Here we summarize our experience with constrained regression splines: * even simple constraints can be inconsistent, see Wikipedia article on this subject: http://en.wikipedia.org/wiki/Birkhoff_interpolation * the greater is M (given fixed constraints), the more chances that constraints will be consistent * in the general case, consistency of constraints is NOT GUARANTEED. * in the one special cases, however, we can guarantee consistency. This case is: M>1 and constraints on the function values (NOT DERIVATIVES) Our final recommendation is to use constraints WHEN AND ONLY when you can't solve your task without them. Anything beyond special cases given above is not guaranteed and may result in inconsistency. -- ALGLIB PROJECT -- Copyright 10.12.2009 by Bochkanov Sergey *************************************************************************/ public static void polynomialfitwc(double[] x, double[] y, double[] w, int n, double[] xc, double[] yc, int[] dc, int k, int m, ref int info, ratint.barycentricinterpolant p, polynomialfitreport rep) { double xa = 0; double xb = 0; double sa = 0; double sb = 0; double[] xoriginal = new double[0]; double[] yoriginal = new double[0]; double[] y2 = new double[0]; double[] w2 = new double[0]; double[] tmp = new double[0]; double[] tmp2 = new double[0]; double[] bx = new double[0]; double[] by = new double[0]; double[] bw = new double[0]; int i = 0; int j = 0; double u = 0; double v = 0; double s = 0; int relcnt = 0; lsfitreport lrep = new lsfitreport(); x = (double[])x.Clone(); y = (double[])y.Clone(); w = (double[])w.Clone(); xc = (double[])xc.Clone(); yc = (double[])yc.Clone(); info = 0; alglib.ap.assert(n>0, "PolynomialFitWC: N<=0!"); alglib.ap.assert(m>0, "PolynomialFitWC: M<=0!"); alglib.ap.assert(k>=0, "PolynomialFitWC: K<0!"); alglib.ap.assert(k<m, "PolynomialFitWC: K>=M!"); alglib.ap.assert(alglib.ap.len(x)>=n, "PolynomialFitWC: Length(X)<N!"); alglib.ap.assert(alglib.ap.len(y)>=n, "PolynomialFitWC: Length(Y)<N!"); alglib.ap.assert(alglib.ap.len(w)>=n, "PolynomialFitWC: Length(W)<N!"); alglib.ap.assert(alglib.ap.len(xc)>=k, "PolynomialFitWC: Length(XC)<K!"); alglib.ap.assert(alglib.ap.len(yc)>=k, "PolynomialFitWC: Length(YC)<K!"); alglib.ap.assert(alglib.ap.len(dc)>=k, "PolynomialFitWC: Length(DC)<K!"); alglib.ap.assert(apserv.isfinitevector(x, n), "PolynomialFitWC: X contains infinite or NaN values!"); alglib.ap.assert(apserv.isfinitevector(y, n), "PolynomialFitWC: Y contains infinite or NaN values!"); alglib.ap.assert(apserv.isfinitevector(w, n), "PolynomialFitWC: X contains infinite or NaN values!"); alglib.ap.assert(apserv.isfinitevector(xc, k), "PolynomialFitWC: XC contains infinite or NaN values!"); alglib.ap.assert(apserv.isfinitevector(yc, k), "PolynomialFitWC: YC contains infinite or NaN values!"); for(i=0; i<=k-1; i++) { alglib.ap.assert(dc[i]==0 || dc[i]==1, "PolynomialFitWC: one of DC[] is not 0 or 1!"); } // // Scale X, Y, XC, YC. // Solve scaled problem using internal Chebyshev fitting function. // lsfitscalexy(ref x, ref y, ref w, n, ref xc, ref yc, dc, k, ref xa, ref xb, ref sa, ref sb, ref xoriginal, ref yoriginal); internalchebyshevfit(x, y, w, n, xc, yc, dc, k, m, ref info, ref tmp, lrep); if( info<0 ) { return; } // // Generate barycentric model and scale it // * BX, BY store barycentric model nodes // * FMatrix is reused (remember - it is at least MxM, what we need) // // Model intialization is done in O(M^2). In principle, it can be // done in O(M*log(M)), but before it we solved task with O(N*M^2) // complexity, so it is only a small amount of total time spent. // bx = new double[m]; by = new double[m]; bw = new double[m]; tmp2 = new double[m]; s = 1; for(i=0; i<=m-1; i++) { if( m!=1 ) { u = Math.Cos(Math.PI*i/(m-1)); } else { u = 0; } v = 0; for(j=0; j<=m-1; j++) { if( j==0 ) { tmp2[j] = 1; } else { if( j==1 ) { tmp2[j] = u; } else { tmp2[j] = 2*u*tmp2[j-1]-tmp2[j-2]; } } v = v+tmp[j]*tmp2[j]; } bx[i] = u; by[i] = v; bw[i] = s; if( i==0 || i==m-1 ) { bw[i] = 0.5*bw[i]; } s = -s; } ratint.barycentricbuildxyw(bx, by, bw, m, p); ratint.barycentriclintransx(p, 2/(xb-xa), -((xa+xb)/(xb-xa))); ratint.barycentriclintransy(p, sb-sa, sa); // // Scale absolute errors obtained from LSFitLinearW. // Relative error should be calculated separately // (because of shifting/scaling of the task) // rep.taskrcond = lrep.taskrcond; rep.rmserror = lrep.rmserror*(sb-sa); rep.avgerror = lrep.avgerror*(sb-sa); rep.maxerror = lrep.maxerror*(sb-sa); rep.avgrelerror = 0; relcnt = 0; for(i=0; i<=n-1; i++) { if( (double)(yoriginal[i])!=(double)(0) ) { rep.avgrelerror = rep.avgrelerror+Math.Abs(ratint.barycentriccalc(p, xoriginal[i])-yoriginal[i])/Math.Abs(yoriginal[i]); relcnt = relcnt+1; } } if( relcnt!=0 ) { rep.avgrelerror = rep.avgrelerror/relcnt; } }
/************************************************************************* This internal function estimates covariance matrix and other error-related information for linear/nonlinear least squares model. It has a bit awkward interface, but it can be used for both linear and nonlinear problems. INPUT PARAMETERS: F1 - array[0..N-1,0..K-1]: * for linear problems - matrix of function values * for nonlinear problems - Jacobian matrix F0 - array[0..N-1]: * for linear problems - must be filled with zeros * for nonlinear problems - must store values of function being fitted Y - array[0..N-1]: * for linear and nonlinear problems - must store target values W - weights, array[0..N-1]: * for linear and nonlinear problems - weights X - array[0..K-1]: * for linear and nonlinear problems - current solution S - array[0..K-1]: * its components should be strictly positive * squared inverse of this diagonal matrix is used as damping factor for covariance matrix (linear and nonlinear problems) * for nonlinear problems, when scale of the variables is usually explicitly given by user, you may use scale vector for this parameter * for linear problems you may set this parameter to S=sqrt(1/diag(F'*F)) * this parameter is automatically rescaled by this function, only relative magnitudes of its components (with respect to each other) matter. N - number of points, N>0. K - number of dimensions Rep - structure which is used to store results Z - additional matrix which, depending on ZKind, may contain some information used to accelerate calculations - or just can be temporary buffer: * for ZKind=0 Z contains no information, just temporary buffer which can be resized and used as needed * for ZKind=1 Z contains triangular matrix from QR decomposition of W*F1. This matrix can be used to speedup calculation of covariance matrix. It should not be changed by algorithm. ZKind- contents of Z OUTPUT PARAMETERS: * Rep.CovPar covariance matrix for parameters, array[K,K]. * Rep.ErrPar errors in parameters, array[K], errpar = sqrt(diag(CovPar)) * Rep.ErrCurve vector of fit errors - standard deviations of empirical best-fit curve from "ideal" best-fit curve built with infinite number of samples, array[N]. errcurve = sqrt(diag(J*CovPar*J')), where J is Jacobian matrix. * Rep.Noise vector of per-point estimates of noise, array[N] * Rep.R2 coefficient of determination (non-weighted) Other fields of Rep are not changed. IMPORTANT: errors in parameters are calculated without taking into account boundary/linear constraints! Presence of constraints changes distribution of errors, but there is no easy way to account for constraints when you calculate covariance matrix. NOTE: noise in the data is estimated as follows: * for fitting without user-supplied weights all points are assumed to have same level of noise, which is estimated from the data * for fitting with user-supplied weights we assume that noise level in I-th point is inversely proportional to Ith weight. Coefficient of proportionality is estimated from the data. NOTE: we apply small amount of regularization when we invert squared Jacobian and calculate covariance matrix. It guarantees that algorithm won't divide by zero during inversion, but skews error estimates a bit (fractional error is about 10^-9). However, we believe that this difference is insignificant for all practical purposes except for the situation when you want to compare ALGLIB results with "reference" implementation up to the last significant digit. -- ALGLIB PROJECT -- Copyright 10.12.2009 by Bochkanov Sergey *************************************************************************/ private static void estimateerrors(double[,] f1, double[] f0, double[] y, double[] w, double[] x, double[] s, int n, int k, lsfitreport rep, ref double[,] z, int zkind) { int i = 0; int j = 0; int j1 = 0; double v = 0; double noisec = 0; int info = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int nzcnt = 0; double avg = 0; double rss = 0; double tss = 0; double sz = 0; double ss = 0; int i_ = 0; s = (double[])s.Clone(); // // Compute NZCnt - count of non-zero weights // nzcnt = 0; for(i=0; i<=n-1; i++) { if( (double)(w[i])!=(double)(0) ) { nzcnt = nzcnt+1; } } // // Compute R2 // if( nzcnt>0 ) { avg = 0.0; for(i=0; i<=n-1; i++) { if( (double)(w[i])!=(double)(0) ) { avg = avg+y[i]; } } avg = avg/nzcnt; rss = 0.0; tss = 0.0; for(i=0; i<=n-1; i++) { if( (double)(w[i])!=(double)(0) ) { v = 0.0; for(i_=0; i_<=k-1;i_++) { v += f1[i,i_]*x[i_]; } v = v+f0[i]; rss = rss+math.sqr(v-y[i]); tss = tss+math.sqr(y[i]-avg); } } if( (double)(tss)!=(double)(0) ) { rep.r2 = Math.Max(1.0-rss/tss, 0.0); } else { rep.r2 = 1.0; } } else { rep.r2 = 0; } // // Compute estimate of proportionality between noise in the data and weights: // NoiseC = mean(per-point-noise*per-point-weight) // Noise level (standard deviation) at each point is equal to NoiseC/W[I]. // if( nzcnt>k ) { noisec = 0.0; for(i=0; i<=n-1; i++) { if( (double)(w[i])!=(double)(0) ) { v = 0.0; for(i_=0; i_<=k-1;i_++) { v += f1[i,i_]*x[i_]; } v = v+f0[i]; noisec = noisec+math.sqr((v-y[i])*w[i]); } } noisec = Math.Sqrt(noisec/(nzcnt-k)); } else { noisec = 0.0; } // // Two branches on noise level: // * NoiseC>0 normal situation // * NoiseC=0 degenerate case CovPar is filled by zeros // apserv.rmatrixsetlengthatleast(ref rep.covpar, k, k); if( (double)(noisec)>(double)(0) ) { // // Normal situation: non-zero noise level // alglib.ap.assert(zkind==0 || zkind==1, "LSFit: internal error in EstimateErrors() function"); if( zkind==0 ) { // // Z contains no additional information which can be used to speed up // calculations. We have to calculate covariance matrix on our own: // * Compute scaled Jacobian N*J, where N[i,i]=WCur[I]/NoiseC, store in Z // * Compute Z'*Z, store in CovPar // * Apply moderate regularization to CovPar and compute matrix inverse. // In case inverse failed, increase regularization parameter and try // again. // apserv.rmatrixsetlengthatleast(ref z, n, k); for(i=0; i<=n-1; i++) { v = w[i]/noisec; for(i_=0; i_<=k-1;i_++) { z[i,i_] = v*f1[i,i_]; } } // // Convert S to automatically scaled damped matrix: // * calculate SZ - sum of diagonal elements of Z'*Z // * calculate SS - sum of diagonal elements of S^(-2) // * overwrite S by (SZ/SS)*S^(-2) // * now S has approximately same magnitude as giagonal of Z'*Z // sz = 0; for(i=0; i<=n-1; i++) { for(j=0; j<=k-1; j++) { sz = sz+z[i,j]*z[i,j]; } } if( (double)(sz)==(double)(0) ) { sz = 1; } ss = 0; for(j=0; j<=k-1; j++) { ss = ss+1/math.sqr(s[j]); } for(j=0; j<=k-1; j++) { s[j] = sz/ss/math.sqr(s[j]); } // // Calculate damped inverse inv(Z'*Z+S). // We increase damping factor V until Z'*Z become well-conditioned. // v = 1.0E3*math.machineepsilon; do { ablas.rmatrixsyrk(k, n, 1.0, z, 0, 0, 2, 0.0, rep.covpar, 0, 0, true); for(i=0; i<=k-1; i++) { rep.covpar[i,i] = rep.covpar[i,i]+v*s[i]; } matinv.spdmatrixinverse(ref rep.covpar, k, true, ref info, invrep); v = 10*v; } while( info<=0 ); for(i=0; i<=k-1; i++) { for(j=i+1; j<=k-1; j++) { rep.covpar[j,i] = rep.covpar[i,j]; } } } if( zkind==1 ) { // // We can reuse additional information: // * Z contains R matrix from QR decomposition of W*F1 // * After multiplication by 1/NoiseC we get Z_mod = N*F1, where diag(N)=w[i]/NoiseC // * Such triangular Z_mod is a Cholesky factor from decomposition of J'*N'*N*J. // Thus, we can calculate covariance matrix as inverse of the matrix given by // its Cholesky decomposition. It allow us to avoid time-consuming calculation // of J'*N'*N*J in CovPar - complexity is reduced from O(N*K^2) to O(K^3), which // is quite good because K is usually orders of magnitude smaller than N. // // First, convert S to automatically scaled damped matrix: // * calculate SZ - sum of magnitudes of diagonal elements of Z/NoiseC // * calculate SS - sum of diagonal elements of S^(-1) // * overwrite S by (SZ/SS)*S^(-1) // * now S has approximately same magnitude as giagonal of Z'*Z // sz = 0; for(j=0; j<=k-1; j++) { sz = sz+Math.Abs(z[j,j]/noisec); } if( (double)(sz)==(double)(0) ) { sz = 1; } ss = 0; for(j=0; j<=k-1; j++) { ss = ss+1/s[j]; } for(j=0; j<=k-1; j++) { s[j] = sz/ss/s[j]; } // // Calculate damped inverse of inv((Z+v*S)'*(Z+v*S)) // We increase damping factor V until matrix become well-conditioned. // v = 1.0E3*math.machineepsilon; do { for(i=0; i<=k-1; i++) { for(j=i; j<=k-1; j++) { rep.covpar[i,j] = z[i,j]/noisec; } rep.covpar[i,i] = rep.covpar[i,i]+v*s[i]; } matinv.spdmatrixcholeskyinverse(ref rep.covpar, k, true, ref info, invrep); v = 10*v; } while( info<=0 ); for(i=0; i<=k-1; i++) { for(j=i+1; j<=k-1; j++) { rep.covpar[j,i] = rep.covpar[i,j]; } } } } else { // // Degenerate situation: zero noise level, covariance matrix is zero. // for(i=0; i<=k-1; i++) { for(j=0; j<=k-1; j++) { rep.covpar[j,i] = 0; } } } // // Estimate erorrs in parameters, curve and per-point noise // apserv.rvectorsetlengthatleast(ref rep.errpar, k); apserv.rvectorsetlengthatleast(ref rep.errcurve, n); apserv.rvectorsetlengthatleast(ref rep.noise, n); for(i=0; i<=k-1; i++) { rep.errpar[i] = Math.Sqrt(rep.covpar[i,i]); } for(i=0; i<=n-1; i++) { // // ErrCurve[I] is sqrt(P[i,i]) where P=J*CovPar*J' // v = 0.0; for(j=0; j<=k-1; j++) { for(j1=0; j1<=k-1; j1++) { v = v+f1[i,j]*rep.covpar[j,j1]*f1[i,j1]; } } rep.errcurve[i] = Math.Sqrt(v); // // Noise[i] is filled using weights and current estimate of noise level // if( (double)(w[i])!=(double)(0) ) { rep.noise[i] = noisec/w[i]; } else { rep.noise[i] = 0; } } }
/************************************************************************* Weighted linear least squares fitting. QR decomposition is used to reduce task to MxM, then triangular solver or SVD-based solver is used depending on condition number of the system. It allows to maximize speed and retain decent accuracy. IMPORTANT: if you want to perform polynomial fitting, it may be more convenient to use PolynomialFit() function. This function gives best results on polynomial problems and solves numerical stability issues which arise when you fit high-degree polynomials to your data. INPUT PARAMETERS: Y - array[0..N-1] Function values in N points. W - array[0..N-1] Weights corresponding to function values. Each summand in square sum of approximation deviations from given values is multiplied by the square of corresponding weight. FMatrix - a table of basis functions values, array[0..N-1, 0..M-1]. FMatrix[I, J] - value of J-th basis function in I-th point. N - number of points used. N>=1. M - number of basis functions, M>=1. OUTPUT PARAMETERS: Info - error code: * -4 internal SVD decomposition subroutine failed (very rare and for degenerate systems only) * -1 incorrect N/M were specified * 1 task is solved C - decomposition coefficients, array[0..M-1] Rep - fitting report. Following fields are set: * Rep.TaskRCond reciprocal of condition number * R2 non-adjusted coefficient of determination (non-weighted) * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED ERRORS IN PARAMETERS This solver also calculates different kinds of errors in parameters and fills corresponding fields of report: * Rep.CovPar covariance matrix for parameters, array[K,K]. * Rep.ErrPar errors in parameters, array[K], errpar = sqrt(diag(CovPar)) * Rep.ErrCurve vector of fit errors - standard deviations of empirical best-fit curve from "ideal" best-fit curve built with infinite number of samples, array[N]. errcurve = sqrt(diag(F*CovPar*F')), where F is functions matrix. * Rep.Noise vector of per-point estimates of noise, array[N] NOTE: noise in the data is estimated as follows: * for fitting without user-supplied weights all points are assumed to have same level of noise, which is estimated from the data * for fitting with user-supplied weights we assume that noise level in I-th point is inversely proportional to Ith weight. Coefficient of proportionality is estimated from the data. NOTE: we apply small amount of regularization when we invert squared Jacobian and calculate covariance matrix. It guarantees that algorithm won't divide by zero during inversion, but skews error estimates a bit (fractional error is about 10^-9). However, we believe that this difference is insignificant for all practical purposes except for the situation when you want to compare ALGLIB results with "reference" implementation up to the last significant digit. NOTE: covariance matrix is estimated using correction for degrees of freedom (covariances are divided by N-M instead of dividing by N). -- ALGLIB -- Copyright 17.08.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinearw(double[] y, double[] w, double[,] fmatrix, int n, int m, ref int info, ref double[] c, lsfitreport rep) { info = 0; c = new double[0]; alglib.ap.assert(n>=1, "LSFitLinearW: N<1!"); alglib.ap.assert(m>=1, "LSFitLinearW: M<1!"); alglib.ap.assert(alglib.ap.len(y)>=n, "LSFitLinearW: length(Y)<N!"); alglib.ap.assert(apserv.isfinitevector(y, n), "LSFitLinearW: Y contains infinite or NaN values!"); alglib.ap.assert(alglib.ap.len(w)>=n, "LSFitLinearW: length(W)<N!"); alglib.ap.assert(apserv.isfinitevector(w, n), "LSFitLinearW: W contains infinite or NaN values!"); alglib.ap.assert(alglib.ap.rows(fmatrix)>=n, "LSFitLinearW: rows(FMatrix)<N!"); alglib.ap.assert(alglib.ap.cols(fmatrix)>=m, "LSFitLinearW: cols(FMatrix)<M!"); alglib.ap.assert(apserv.apservisfinitematrix(fmatrix, n, m), "LSFitLinearW: FMatrix contains infinite or NaN values!"); lsfitlinearinternal(y, w, fmatrix, n, m, ref info, ref c, rep); }
/************************************************************************* Weighted linear least squares fitting. QR decomposition is used to reduce task to MxM, then triangular solver or SVD-based solver is used depending on condition number of the system. It allows to maximize speed and retain decent accuracy. INPUT PARAMETERS: Y - array[0..N-1] Function values in N points. W - array[0..N-1] Weights corresponding to function values. Each summand in square sum of approximation deviations from given values is multiplied by the square of corresponding weight. FMatrix - a table of basis functions values, array[0..N-1, 0..M-1]. FMatrix[I, J] - value of J-th basis function in I-th point. N - number of points used. N>=1. M - number of basis functions, M>=1. OUTPUT PARAMETERS: Info - error code: * -4 internal SVD decomposition subroutine failed (very rare and for degenerate systems only) * -1 incorrect N/M were specified * 1 task is solved C - decomposition coefficients, array[0..M-1] Rep - fitting report. Following fields are set: * Rep.TaskRCond reciprocal of condition number * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED SEE ALSO LSFitLinear LSFitLinearC LSFitLinearWC -- ALGLIB -- Copyright 17.08.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinearw(ref double[] y, ref double[] w, ref double[,] fmatrix, int n, int m, ref int info, ref double[] c, ref lsfitreport rep) { lsfitlinearinternal(ref y, ref w, ref fmatrix, n, m, ref info, ref c, ref rep); }
/************************************************************************* Weighted constained linear least squares fitting. This is variation of LSFitLinearW(), which searchs for min|A*x=b| given that K additional constaints C*x=bc are satisfied. It reduces original task to modified one: min|B*y-d| WITHOUT constraints, then LSFitLinearW() is called. IMPORTANT: if you want to perform polynomial fitting, it may be more convenient to use PolynomialFit() function. This function gives best results on polynomial problems and solves numerical stability issues which arise when you fit high-degree polynomials to your data. INPUT PARAMETERS: Y - array[0..N-1] Function values in N points. W - array[0..N-1] Weights corresponding to function values. Each summand in square sum of approximation deviations from given values is multiplied by the square of corresponding weight. FMatrix - a table of basis functions values, array[0..N-1, 0..M-1]. FMatrix[I,J] - value of J-th basis function in I-th point. CMatrix - a table of constaints, array[0..K-1,0..M]. I-th row of CMatrix corresponds to I-th linear constraint: CMatrix[I,0]*C[0] + ... + CMatrix[I,M-1]*C[M-1] = CMatrix[I,M] N - number of points used. N>=1. M - number of basis functions, M>=1. K - number of constraints, 0 <= K < M K=0 corresponds to absence of constraints. OUTPUT PARAMETERS: Info - error code: * -4 internal SVD decomposition subroutine failed (very rare and for degenerate systems only) * -3 either too many constraints (M or more), degenerate constraints (some constraints are repetead twice) or inconsistent constraints were specified. * 1 task is solved C - decomposition coefficients, array[0..M-1] Rep - fitting report. Following fields are set: * R2 non-adjusted coefficient of determination (non-weighted) * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED IMPORTANT: this subroitine doesn't calculate task's condition number for K<>0. ERRORS IN PARAMETERS This solver also calculates different kinds of errors in parameters and fills corresponding fields of report: * Rep.CovPar covariance matrix for parameters, array[K,K]. * Rep.ErrPar errors in parameters, array[K], errpar = sqrt(diag(CovPar)) * Rep.ErrCurve vector of fit errors - standard deviations of empirical best-fit curve from "ideal" best-fit curve built with infinite number of samples, array[N]. errcurve = sqrt(diag(F*CovPar*F')), where F is functions matrix. * Rep.Noise vector of per-point estimates of noise, array[N] IMPORTANT: errors in parameters are calculated without taking into account boundary/linear constraints! Presence of constraints changes distribution of errors, but there is no easy way to account for constraints when you calculate covariance matrix. NOTE: noise in the data is estimated as follows: * for fitting without user-supplied weights all points are assumed to have same level of noise, which is estimated from the data * for fitting with user-supplied weights we assume that noise level in I-th point is inversely proportional to Ith weight. Coefficient of proportionality is estimated from the data. NOTE: we apply small amount of regularization when we invert squared Jacobian and calculate covariance matrix. It guarantees that algorithm won't divide by zero during inversion, but skews error estimates a bit (fractional error is about 10^-9). However, we believe that this difference is insignificant for all practical purposes except for the situation when you want to compare ALGLIB results with "reference" implementation up to the last significant digit. NOTE: covariance matrix is estimated using correction for degrees of freedom (covariances are divided by N-M instead of dividing by N). -- ALGLIB -- Copyright 07.09.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinearwc(double[] y, double[] w, double[,] fmatrix, double[,] cmatrix, int n, int m, int k, ref int info, ref double[] c, lsfitreport rep) { int i = 0; int j = 0; double[] tau = new double[0]; double[,] q = new double[0,0]; double[,] f2 = new double[0,0]; double[] tmp = new double[0]; double[] c0 = new double[0]; double v = 0; int i_ = 0; y = (double[])y.Clone(); cmatrix = (double[,])cmatrix.Clone(); info = 0; c = new double[0]; alglib.ap.assert(n>=1, "LSFitLinearWC: N<1!"); alglib.ap.assert(m>=1, "LSFitLinearWC: M<1!"); alglib.ap.assert(k>=0, "LSFitLinearWC: K<0!"); alglib.ap.assert(alglib.ap.len(y)>=n, "LSFitLinearWC: length(Y)<N!"); alglib.ap.assert(apserv.isfinitevector(y, n), "LSFitLinearWC: Y contains infinite or NaN values!"); alglib.ap.assert(alglib.ap.len(w)>=n, "LSFitLinearWC: length(W)<N!"); alglib.ap.assert(apserv.isfinitevector(w, n), "LSFitLinearWC: W contains infinite or NaN values!"); alglib.ap.assert(alglib.ap.rows(fmatrix)>=n, "LSFitLinearWC: rows(FMatrix)<N!"); alglib.ap.assert(alglib.ap.cols(fmatrix)>=m, "LSFitLinearWC: cols(FMatrix)<M!"); alglib.ap.assert(apserv.apservisfinitematrix(fmatrix, n, m), "LSFitLinearWC: FMatrix contains infinite or NaN values!"); alglib.ap.assert(alglib.ap.rows(cmatrix)>=k, "LSFitLinearWC: rows(CMatrix)<K!"); alglib.ap.assert(alglib.ap.cols(cmatrix)>=m+1 || k==0, "LSFitLinearWC: cols(CMatrix)<M+1!"); alglib.ap.assert(apserv.apservisfinitematrix(cmatrix, k, m+1), "LSFitLinearWC: CMatrix contains infinite or NaN values!"); if( k>=m ) { info = -3; return; } // // Solve // if( k==0 ) { // // no constraints // lsfitlinearinternal(y, w, fmatrix, n, m, ref info, ref c, rep); } else { // // First, find general form solution of constraints system: // * factorize C = L*Q // * unpack Q // * fill upper part of C with zeros (for RCond) // // We got C=C0+Q2'*y where Q2 is lower M-K rows of Q. // ortfac.rmatrixlq(ref cmatrix, k, m, ref tau); ortfac.rmatrixlqunpackq(cmatrix, k, m, tau, m, ref q); for(i=0; i<=k-1; i++) { for(j=i+1; j<=m-1; j++) { cmatrix[i,j] = 0.0; } } if( (double)(rcond.rmatrixlurcondinf(cmatrix, k))<(double)(1000*math.machineepsilon) ) { info = -3; return; } tmp = new double[k]; for(i=0; i<=k-1; i++) { if( i>0 ) { v = 0.0; for(i_=0; i_<=i-1;i_++) { v += cmatrix[i,i_]*tmp[i_]; } } else { v = 0; } tmp[i] = (cmatrix[i,m]-v)/cmatrix[i,i]; } c0 = new double[m]; for(i=0; i<=m-1; i++) { c0[i] = 0; } for(i=0; i<=k-1; i++) { v = tmp[i]; for(i_=0; i_<=m-1;i_++) { c0[i_] = c0[i_] + v*q[i,i_]; } } // // Second, prepare modified matrix F2 = F*Q2' and solve modified task // tmp = new double[Math.Max(n, m)+1]; f2 = new double[n, m-k]; blas.matrixvectormultiply(fmatrix, 0, n-1, 0, m-1, false, c0, 0, m-1, -1.0, ref y, 0, n-1, 1.0); blas.matrixmatrixmultiply(fmatrix, 0, n-1, 0, m-1, false, q, k, m-1, 0, m-1, true, 1.0, ref f2, 0, n-1, 0, m-k-1, 0.0, ref tmp); lsfitlinearinternal(y, w, f2, n, m-k, ref info, ref tmp, rep); rep.taskrcond = -1; if( info<=0 ) { return; } // // then, convert back to original answer: C = C0 + Q2'*Y0 // c = new double[m]; for(i_=0; i_<=m-1;i_++) { c[i_] = c0[i_]; } blas.matrixvectormultiply(q, k, m-1, 0, m-1, true, tmp, 0, m-k-1, 1.0, ref c, 0, m-1, 1.0); } }
/************************************************************************* Weighted constained linear least squares fitting. This is variation of LSFitLinearW(), which searchs for min|A*x=b| given that K additional constaints C*x=bc are satisfied. It reduces original task to modified one: min|B*y-d| WITHOUT constraints, then LSFitLinearW() is called. INPUT PARAMETERS: Y - array[0..N-1] Function values in N points. W - array[0..N-1] Weights corresponding to function values. Each summand in square sum of approximation deviations from given values is multiplied by the square of corresponding weight. FMatrix - a table of basis functions values, array[0..N-1, 0..M-1]. FMatrix[I,J] - value of J-th basis function in I-th point. CMatrix - a table of constaints, array[0..K-1,0..M]. I-th row of CMatrix corresponds to I-th linear constraint: CMatrix[I,0]*C[0] + ... + CMatrix[I,M-1]*C[M-1] = CMatrix[I,M] N - number of points used. N>=1. M - number of basis functions, M>=1. K - number of constraints, 0 <= K < M K=0 corresponds to absence of constraints. OUTPUT PARAMETERS: Info - error code: * -4 internal SVD decomposition subroutine failed (very rare and for degenerate systems only) * -3 either too many constraints (M or more), degenerate constraints (some constraints are repetead twice) or inconsistent constraints were specified. * -1 incorrect N/M/K were specified * 1 task is solved C - decomposition coefficients, array[0..M-1] Rep - fitting report. Following fields are set: * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED IMPORTANT: this subroitine doesn't calculate task's condition number for K<>0. SEE ALSO LSFitLinear LSFitLinearC LSFitLinearWC -- ALGLIB -- Copyright 07.09.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinearwc(double[] y, ref double[] w, ref double[,] fmatrix, double[,] cmatrix, int n, int m, int k, ref int info, ref double[] c, ref lsfitreport rep) { int i = 0; int j = 0; double[] tau = new double[0]; double[,] q = new double[0,0]; double[,] f2 = new double[0,0]; double[] tmp = new double[0]; double[] c0 = new double[0]; double v = 0; int i_ = 0; y = (double[])y.Clone(); cmatrix = (double[,])cmatrix.Clone(); if( n<1 | m<1 | k<0 ) { info = -1; return; } if( k>=m ) { info = -3; return; } // // Solve // if( k==0 ) { // // no constraints // lsfitlinearinternal(ref y, ref w, ref fmatrix, n, m, ref info, ref c, ref rep); } else { // // First, find general form solution of constraints system: // * factorize C = L*Q // * unpack Q // * fill upper part of C with zeros (for RCond) // // We got C=C0+Q2'*y where Q2 is lower M-K rows of Q. // ortfac.rmatrixlq(ref cmatrix, k, m, ref tau); ortfac.rmatrixlqunpackq(ref cmatrix, k, m, ref tau, m, ref q); for(i=0; i<=k-1; i++) { for(j=i+1; j<=m-1; j++) { cmatrix[i,j] = 0.0; } } if( (double)(rcond.rmatrixlurcondinf(ref cmatrix, k))<(double)(1000*AP.Math.MachineEpsilon) ) { info = -3; return; } tmp = new double[k]; for(i=0; i<=k-1; i++) { if( i>0 ) { v = 0.0; for(i_=0; i_<=i-1;i_++) { v += cmatrix[i,i_]*tmp[i_]; } } else { v = 0; } tmp[i] = (cmatrix[i,m]-v)/cmatrix[i,i]; } c0 = new double[m]; for(i=0; i<=m-1; i++) { c0[i] = 0; } for(i=0; i<=k-1; i++) { v = tmp[i]; for(i_=0; i_<=m-1;i_++) { c0[i_] = c0[i_] + v*q[i,i_]; } } // // Second, prepare modified matrix F2 = F*Q2' and solve modified task // tmp = new double[Math.Max(n, m)+1]; f2 = new double[n, m-k]; blas.matrixvectormultiply(ref fmatrix, 0, n-1, 0, m-1, false, ref c0, 0, m-1, -1.0, ref y, 0, n-1, 1.0); blas.matrixmatrixmultiply(ref fmatrix, 0, n-1, 0, m-1, false, ref q, k, m-1, 0, m-1, true, 1.0, ref f2, 0, n-1, 0, m-k-1, 0.0, ref tmp); lsfitlinearinternal(ref y, ref w, ref f2, n, m-k, ref info, ref tmp, ref rep); rep.taskrcond = -1; if( info<=0 ) { return; } // // then, convert back to original answer: C = C0 + Q2'*Y0 // c = new double[m]; for(i_=0; i_<=m-1;i_++) { c[i_] = c0[i_]; } blas.matrixvectormultiply(ref q, k, m-1, 0, m-1, true, ref tmp, 0, m-k-1, 1.0, ref c, 0, m-1, 1.0); } }
/************************************************************************* Linear least squares fitting. QR decomposition is used to reduce task to MxM, then triangular solver or SVD-based solver is used depending on condition number of the system. It allows to maximize speed and retain decent accuracy. IMPORTANT: if you want to perform polynomial fitting, it may be more convenient to use PolynomialFit() function. This function gives best results on polynomial problems and solves numerical stability issues which arise when you fit high-degree polynomials to your data. INPUT PARAMETERS: Y - array[0..N-1] Function values in N points. FMatrix - a table of basis functions values, array[0..N-1, 0..M-1]. FMatrix[I, J] - value of J-th basis function in I-th point. N - number of points used. N>=1. M - number of basis functions, M>=1. OUTPUT PARAMETERS: Info - error code: * -4 internal SVD decomposition subroutine failed (very rare and for degenerate systems only) * 1 task is solved C - decomposition coefficients, array[0..M-1] Rep - fitting report. Following fields are set: * Rep.TaskRCond reciprocal of condition number * R2 non-adjusted coefficient of determination (non-weighted) * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED ERRORS IN PARAMETERS This solver also calculates different kinds of errors in parameters and fills corresponding fields of report: * Rep.CovPar covariance matrix for parameters, array[K,K]. * Rep.ErrPar errors in parameters, array[K], errpar = sqrt(diag(CovPar)) * Rep.ErrCurve vector of fit errors - standard deviations of empirical best-fit curve from "ideal" best-fit curve built with infinite number of samples, array[N]. errcurve = sqrt(diag(F*CovPar*F')), where F is functions matrix. * Rep.Noise vector of per-point estimates of noise, array[N] NOTE: noise in the data is estimated as follows: * for fitting without user-supplied weights all points are assumed to have same level of noise, which is estimated from the data * for fitting with user-supplied weights we assume that noise level in I-th point is inversely proportional to Ith weight. Coefficient of proportionality is estimated from the data. NOTE: we apply small amount of regularization when we invert squared Jacobian and calculate covariance matrix. It guarantees that algorithm won't divide by zero during inversion, but skews error estimates a bit (fractional error is about 10^-9). However, we believe that this difference is insignificant for all practical purposes except for the situation when you want to compare ALGLIB results with "reference" implementation up to the last significant digit. NOTE: covariance matrix is estimated using correction for degrees of freedom (covariances are divided by N-M instead of dividing by N). -- ALGLIB -- Copyright 17.08.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinear(double[] y, double[,] fmatrix, int n, int m, ref int info, ref double[] c, lsfitreport rep) { double[] w = new double[0]; int i = 0; info = 0; c = new double[0]; alglib.ap.assert(n>=1, "LSFitLinear: N<1!"); alglib.ap.assert(m>=1, "LSFitLinear: M<1!"); alglib.ap.assert(alglib.ap.len(y)>=n, "LSFitLinear: length(Y)<N!"); alglib.ap.assert(apserv.isfinitevector(y, n), "LSFitLinear: Y contains infinite or NaN values!"); alglib.ap.assert(alglib.ap.rows(fmatrix)>=n, "LSFitLinear: rows(FMatrix)<N!"); alglib.ap.assert(alglib.ap.cols(fmatrix)>=m, "LSFitLinear: cols(FMatrix)<M!"); alglib.ap.assert(apserv.apservisfinitematrix(fmatrix, n, m), "LSFitLinear: FMatrix contains infinite or NaN values!"); w = new double[n]; for(i=0; i<=n-1; i++) { w[i] = 1; } lsfitlinearinternal(y, w, fmatrix, n, m, ref info, ref c, rep); }
/************************************************************************* Constained linear least squares fitting, without weights. See LSFitLinearWC() for more information. -- ALGLIB -- Copyright 07.09.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinearc(double[] y, ref double[,] fmatrix, ref double[,] cmatrix, int n, int m, int k, ref int info, ref double[] c, ref lsfitreport rep) { double[] w = new double[0]; int i = 0; y = (double[])y.Clone(); if( n<1 ) { info = -1; return; } w = new double[n]; for(i=0; i<=n-1; i++) { w[i] = 1; } lsfitlinearwc(y, ref w, ref fmatrix, cmatrix, n, m, k, ref info, ref c, ref rep); }
/************************************************************************* Constained linear least squares fitting. This is variation of LSFitLinear(), which searchs for min|A*x=b| given that K additional constaints C*x=bc are satisfied. It reduces original task to modified one: min|B*y-d| WITHOUT constraints, then LSFitLinear() is called. INPUT PARAMETERS: Y - array[0..N-1] Function values in N points. FMatrix - a table of basis functions values, array[0..N-1, 0..M-1]. FMatrix[I,J] - value of J-th basis function in I-th point. CMatrix - a table of constaints, array[0..K-1,0..M]. I-th row of CMatrix corresponds to I-th linear constraint: CMatrix[I,0]*C[0] + ... + CMatrix[I,M-1]*C[M-1] = CMatrix[I,M] N - number of points used. N>=1. M - number of basis functions, M>=1. K - number of constraints, 0 <= K < M K=0 corresponds to absence of constraints. OUTPUT PARAMETERS: Info - error code: * -4 internal SVD decomposition subroutine failed (very rare and for degenerate systems only) * -3 either too many constraints (M or more), degenerate constraints (some constraints are repetead twice) or inconsistent constraints were specified. * 1 task is solved C - decomposition coefficients, array[0..M-1] Rep - fitting report. Following fields are set: * RMSError rms error on the (X,Y). * AvgError average error on the (X,Y). * AvgRelError average relative error on the non-zero Y * MaxError maximum error NON-WEIGHTED ERRORS ARE CALCULATED IMPORTANT: this subroitine doesn't calculate task's condition number for K<>0. -- ALGLIB -- Copyright 07.09.2009 by Bochkanov Sergey *************************************************************************/ public static void lsfitlinearc(double[] y, double[,] fmatrix, double[,] cmatrix, int n, int m, int k, out int info, out double[] c, out lsfitreport rep) { info = 0; c = new double[0]; rep = new lsfitreport(); lsfit.lsfitlinearc(y, fmatrix, cmatrix, n, m, k, ref info, ref c, rep.innerobj); return; }