Пример #1
0
    /*************************************************************************
    *  Linear regression
    *
    *  Subroutine builds model:
    *
    *   Y = A(0)*X[0] + ... + A(N-1)*X[N-1] + A(N)
    *
    *  and model found in ALGLIB format, covariation matrix, training set  errors
    *  (rms,  average,  average  relative)   and  leave-one-out  cross-validation
    *  estimate of the generalization error. CV  estimate calculated  using  fast
    *  algorithm with O(NPoints*NVars) complexity.
    *
    *  When  covariation  matrix  is  calculated  standard deviations of function
    *  values are assumed to be equal to RMS error on the training set.
    *
    *  INPUT PARAMETERS:
    *   XY          -   training set, array [0..NPoints-1,0..NVars]:
    * NVars columns - independent variables
    * last column - dependent variable
    *   NPoints     -   training set size, NPoints>NVars+1
    *   NVars       -   number of independent variables
    *
    *  OUTPUT PARAMETERS:
    *   Info        -   return code:
    * -255, in case of unknown internal error
    * -4, if internal SVD subroutine haven't converged
    * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
    *  1, if subroutine successfully finished
    *   LM          -   linear model in the ALGLIB format. Use subroutines of
    *                   this unit to work with the model.
    *   AR          -   additional results
    *
    *
    *  -- ALGLIB --
    *    Copyright 02.08.2008 by Bochkanov Sergey
    *************************************************************************/
    public static void lrbuild(ref double[,] xy,
                               int npoints,
                               int nvars,
                               ref int info,
                               ref linearmodel lm,
                               ref lrreport ar)
    {
        double[] s      = new double[0];
        int      i      = 0;
        double   sigma2 = 0;
        int      i_     = 0;

        if (npoints <= nvars + 1 | nvars < 1)
        {
            info = -1;
            return;
        }
        s = new double[npoints - 1 + 1];
        for (i = 0; i <= npoints - 1; i++)
        {
            s[i] = 1;
        }
        lrbuilds(ref xy, ref s, npoints, nvars, ref info, ref lm, ref ar);
        if (info < 0)
        {
            return;
        }
        sigma2 = AP.Math.Sqr(ar.rmserror) * npoints / (npoints - nvars - 1);
        for (i = 0; i <= nvars; i++)
        {
            for (i_ = 0; i_ <= nvars; i_++)
            {
                ar.c[i, i_] = sigma2 * ar.c[i, i_];
            }
        }
    }
Пример #2
0
        /*************************************************************************
        Internal linear regression subroutine
        *************************************************************************/
        private static void lrinternal(double[,] xy,
            double[] s,
            int npoints,
            int nvars,
            ref int info,
            linearmodel lm,
            lrreport ar)
        {
            double[,] a = new double[0,0];
            double[,] u = new double[0,0];
            double[,] vt = new double[0,0];
            double[,] vm = new double[0,0];
            double[,] xym = new double[0,0];
            double[] b = new double[0];
            double[] sv = new double[0];
            double[] t = new double[0];
            double[] svi = new double[0];
            double[] work = new double[0];
            int i = 0;
            int j = 0;
            int k = 0;
            int ncv = 0;
            int na = 0;
            int nacv = 0;
            double r = 0;
            double p = 0;
            double epstol = 0;
            lrreport ar2 = new lrreport();
            int offs = 0;
            linearmodel tlm = new linearmodel();
            int i_ = 0;
            int i1_ = 0;

            info = 0;

            epstol = 1000;
            
            //
            // Check for errors in data
            //
            if( npoints<nvars || nvars<1 )
            {
                info = -1;
                return;
            }
            for(i=0; i<=npoints-1; i++)
            {
                if( (double)(s[i])<=(double)(0) )
                {
                    info = -2;
                    return;
                }
            }
            info = 1;
            
            //
            // Create design matrix
            //
            a = new double[npoints-1+1, nvars-1+1];
            b = new double[npoints-1+1];
            for(i=0; i<=npoints-1; i++)
            {
                r = 1/s[i];
                for(i_=0; i_<=nvars-1;i_++)
                {
                    a[i,i_] = r*xy[i,i_];
                }
                b[i] = xy[i,nvars]/s[i];
            }
            
            //
            // Allocate W:
            // W[0]     array size
            // W[1]     version number, 0
            // W[2]     NVars (minus 1, to be compatible with external representation)
            // W[3]     coefficients offset
            //
            lm.w = new double[4+nvars-1+1];
            offs = 4;
            lm.w[0] = 4+nvars;
            lm.w[1] = lrvnum;
            lm.w[2] = nvars-1;
            lm.w[3] = offs;
            
            //
            // Solve problem using SVD:
            //
            // 0. check for degeneracy (different types)
            // 1. A = U*diag(sv)*V'
            // 2. T = b'*U
            // 3. w = SUM((T[i]/sv[i])*V[..,i])
            // 4. cov(wi,wj) = SUM(Vji*Vjk/sv[i]^2,K=1..M)
            //
            // see $15.4 of "Numerical Recipes in C" for more information
            //
            t = new double[nvars-1+1];
            svi = new double[nvars-1+1];
            ar.c = new double[nvars-1+1, nvars-1+1];
            vm = new double[nvars-1+1, nvars-1+1];
            if( !svd.rmatrixsvd(a, npoints, nvars, 1, 1, 2, ref sv, ref u, ref vt) )
            {
                info = -4;
                return;
            }
            if( (double)(sv[0])<=(double)(0) )
            {
                
                //
                // Degenerate case: zero design matrix.
                //
                for(i=offs; i<=offs+nvars-1; i++)
                {
                    lm.w[i] = 0;
                }
                ar.rmserror = lrrmserror(lm, xy, npoints);
                ar.avgerror = lravgerror(lm, xy, npoints);
                ar.avgrelerror = lravgrelerror(lm, xy, npoints);
                ar.cvrmserror = ar.rmserror;
                ar.cvavgerror = ar.avgerror;
                ar.cvavgrelerror = ar.avgrelerror;
                ar.ncvdefects = 0;
                ar.cvdefects = new int[nvars-1+1];
                ar.c = new double[nvars-1+1, nvars-1+1];
                for(i=0; i<=nvars-1; i++)
                {
                    for(j=0; j<=nvars-1; j++)
                    {
                        ar.c[i,j] = 0;
                    }
                }
                return;
            }
            if( (double)(sv[nvars-1])<=(double)(epstol*math.machineepsilon*sv[0]) )
            {
                
                //
                // Degenerate case, non-zero design matrix.
                //
                // We can leave it and solve task in SVD least squares fashion.
                // Solution and covariance matrix will be obtained correctly,
                // but CV error estimates - will not. It is better to reduce
                // it to non-degenerate task and to obtain correct CV estimates.
                //
                for(k=nvars; k>=1; k--)
                {
                    if( (double)(sv[k-1])>(double)(epstol*math.machineepsilon*sv[0]) )
                    {
                        
                        //
                        // Reduce
                        //
                        xym = new double[npoints-1+1, k+1];
                        for(i=0; i<=npoints-1; i++)
                        {
                            for(j=0; j<=k-1; j++)
                            {
                                r = 0.0;
                                for(i_=0; i_<=nvars-1;i_++)
                                {
                                    r += xy[i,i_]*vt[j,i_];
                                }
                                xym[i,j] = r;
                            }
                            xym[i,k] = xy[i,nvars];
                        }
                        
                        //
                        // Solve
                        //
                        lrinternal(xym, s, npoints, k, ref info, tlm, ar2);
                        if( info!=1 )
                        {
                            return;
                        }
                        
                        //
                        // Convert back to un-reduced format
                        //
                        for(j=0; j<=nvars-1; j++)
                        {
                            lm.w[offs+j] = 0;
                        }
                        for(j=0; j<=k-1; j++)
                        {
                            r = tlm.w[offs+j];
                            i1_ = (0) - (offs);
                            for(i_=offs; i_<=offs+nvars-1;i_++)
                            {
                                lm.w[i_] = lm.w[i_] + r*vt[j,i_+i1_];
                            }
                        }
                        ar.rmserror = ar2.rmserror;
                        ar.avgerror = ar2.avgerror;
                        ar.avgrelerror = ar2.avgrelerror;
                        ar.cvrmserror = ar2.cvrmserror;
                        ar.cvavgerror = ar2.cvavgerror;
                        ar.cvavgrelerror = ar2.cvavgrelerror;
                        ar.ncvdefects = ar2.ncvdefects;
                        ar.cvdefects = new int[nvars-1+1];
                        for(j=0; j<=ar.ncvdefects-1; j++)
                        {
                            ar.cvdefects[j] = ar2.cvdefects[j];
                        }
                        ar.c = new double[nvars-1+1, nvars-1+1];
                        work = new double[nvars+1];
                        blas.matrixmatrixmultiply(ar2.c, 0, k-1, 0, k-1, false, vt, 0, k-1, 0, nvars-1, false, 1.0, ref vm, 0, k-1, 0, nvars-1, 0.0, ref work);
                        blas.matrixmatrixmultiply(vt, 0, k-1, 0, nvars-1, true, vm, 0, k-1, 0, nvars-1, false, 1.0, ref ar.c, 0, nvars-1, 0, nvars-1, 0.0, ref work);
                        return;
                    }
                }
                info = -255;
                return;
            }
            for(i=0; i<=nvars-1; i++)
            {
                if( (double)(sv[i])>(double)(epstol*math.machineepsilon*sv[0]) )
                {
                    svi[i] = 1/sv[i];
                }
                else
                {
                    svi[i] = 0;
                }
            }
            for(i=0; i<=nvars-1; i++)
            {
                t[i] = 0;
            }
            for(i=0; i<=npoints-1; i++)
            {
                r = b[i];
                for(i_=0; i_<=nvars-1;i_++)
                {
                    t[i_] = t[i_] + r*u[i,i_];
                }
            }
            for(i=0; i<=nvars-1; i++)
            {
                lm.w[offs+i] = 0;
            }
            for(i=0; i<=nvars-1; i++)
            {
                r = t[i]*svi[i];
                i1_ = (0) - (offs);
                for(i_=offs; i_<=offs+nvars-1;i_++)
                {
                    lm.w[i_] = lm.w[i_] + r*vt[i,i_+i1_];
                }
            }
            for(j=0; j<=nvars-1; j++)
            {
                r = svi[j];
                for(i_=0; i_<=nvars-1;i_++)
                {
                    vm[i_,j] = r*vt[j,i_];
                }
            }
            for(i=0; i<=nvars-1; i++)
            {
                for(j=i; j<=nvars-1; j++)
                {
                    r = 0.0;
                    for(i_=0; i_<=nvars-1;i_++)
                    {
                        r += vm[i,i_]*vm[j,i_];
                    }
                    ar.c[i,j] = r;
                    ar.c[j,i] = r;
                }
            }
            
            //
            // Leave-1-out cross-validation error.
            //
            // NOTATIONS:
            // A            design matrix
            // A*x = b      original linear least squares task
            // U*S*V'       SVD of A
            // ai           i-th row of the A
            // bi           i-th element of the b
            // xf           solution of the original LLS task
            //
            // Cross-validation error of i-th element from a sample is
            // calculated using following formula:
            //
            //     ERRi = ai*xf - (ai*xf-bi*(ui*ui'))/(1-ui*ui')     (1)
            //
            // This formula can be derived from normal equations of the
            // original task
            //
            //     (A'*A)x = A'*b                                    (2)
            //
            // by applying modification (zeroing out i-th row of A) to (2):
            //
            //     (A-ai)'*(A-ai) = (A-ai)'*b
            //
            // and using Sherman-Morrison formula for updating matrix inverse
            //
            // NOTE 1: b is not zeroed out since it is much simpler and
            // does not influence final result.
            //
            // NOTE 2: some design matrices A have such ui that 1-ui*ui'=0.
            // Formula (1) can't be applied for such cases and they are skipped
            // from CV calculation (which distorts resulting CV estimate).
            // But from the properties of U we can conclude that there can
            // be no more than NVars such vectors. Usually
            // NVars << NPoints, so in a normal case it only slightly
            // influences result.
            //
            ncv = 0;
            na = 0;
            nacv = 0;
            ar.rmserror = 0;
            ar.avgerror = 0;
            ar.avgrelerror = 0;
            ar.cvrmserror = 0;
            ar.cvavgerror = 0;
            ar.cvavgrelerror = 0;
            ar.ncvdefects = 0;
            ar.cvdefects = new int[nvars-1+1];
            for(i=0; i<=npoints-1; i++)
            {
                
                //
                // Error on a training set
                //
                i1_ = (offs)-(0);
                r = 0.0;
                for(i_=0; i_<=nvars-1;i_++)
                {
                    r += xy[i,i_]*lm.w[i_+i1_];
                }
                ar.rmserror = ar.rmserror+math.sqr(r-xy[i,nvars]);
                ar.avgerror = ar.avgerror+Math.Abs(r-xy[i,nvars]);
                if( (double)(xy[i,nvars])!=(double)(0) )
                {
                    ar.avgrelerror = ar.avgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]);
                    na = na+1;
                }
                
                //
                // Error using fast leave-one-out cross-validation
                //
                p = 0.0;
                for(i_=0; i_<=nvars-1;i_++)
                {
                    p += u[i,i_]*u[i,i_];
                }
                if( (double)(p)>(double)(1-epstol*math.machineepsilon) )
                {
                    ar.cvdefects[ar.ncvdefects] = i;
                    ar.ncvdefects = ar.ncvdefects+1;
                    continue;
                }
                r = s[i]*(r/s[i]-b[i]*p)/(1-p);
                ar.cvrmserror = ar.cvrmserror+math.sqr(r-xy[i,nvars]);
                ar.cvavgerror = ar.cvavgerror+Math.Abs(r-xy[i,nvars]);
                if( (double)(xy[i,nvars])!=(double)(0) )
                {
                    ar.cvavgrelerror = ar.cvavgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]);
                    nacv = nacv+1;
                }
                ncv = ncv+1;
            }
            if( ncv==0 )
            {
                
                //
                // Something strange: ALL ui are degenerate.
                // Unexpected...
                //
                info = -255;
                return;
            }
            ar.rmserror = Math.Sqrt(ar.rmserror/npoints);
            ar.avgerror = ar.avgerror/npoints;
            if( na!=0 )
            {
                ar.avgrelerror = ar.avgrelerror/na;
            }
            ar.cvrmserror = Math.Sqrt(ar.cvrmserror/ncv);
            ar.cvavgerror = ar.cvavgerror/ncv;
            if( nacv!=0 )
            {
                ar.cvavgrelerror = ar.cvavgrelerror/nacv;
            }
        }
Пример #3
0
        /*************************************************************************
        Like LRBuild but builds model

            Y = A(0)*X[0] + ... + A(N-1)*X[N-1]

        i.e. with zero constant term.

          -- ALGLIB --
             Copyright 30.10.2008 by Bochkanov Sergey
        *************************************************************************/
        public static void lrbuildz(double[,] xy,
            int npoints,
            int nvars,
            ref int info,
            linearmodel lm,
            lrreport ar)
        {
            double[] s = new double[0];
            int i = 0;
            double sigma2 = 0;
            int i_ = 0;

            info = 0;

            if( npoints<=nvars+1 || nvars<1 )
            {
                info = -1;
                return;
            }
            s = new double[npoints-1+1];
            for(i=0; i<=npoints-1; i++)
            {
                s[i] = 1;
            }
            lrbuildzs(xy, s, npoints, nvars, ref info, lm, ar);
            if( info<0 )
            {
                return;
            }
            sigma2 = math.sqr(ar.rmserror)*npoints/(npoints-nvars-1);
            for(i=0; i<=nvars; i++)
            {
                for(i_=0; i_<=nvars;i_++)
                {
                    ar.c[i,i_] = sigma2*ar.c[i,i_];
                }
            }
        }
Пример #4
0
        /*************************************************************************
        Like LRBuildS, but builds model

            Y = A(0)*X[0] + ... + A(N-1)*X[N-1]

        i.e. with zero constant term.

          -- ALGLIB --
             Copyright 30.10.2008 by Bochkanov Sergey
        *************************************************************************/
        public static void lrbuildzs(double[,] xy,
            double[] s,
            int npoints,
            int nvars,
            ref int info,
            linearmodel lm,
            lrreport ar)
        {
            double[,] xyi = new double[0,0];
            double[] x = new double[0];
            double[] c = new double[0];
            int i = 0;
            int j = 0;
            double v = 0;
            int offs = 0;
            double mean = 0;
            double variance = 0;
            double skewness = 0;
            double kurtosis = 0;
            int i_ = 0;

            info = 0;

            
            //
            // Test parameters
            //
            if( npoints<=nvars+1 || nvars<1 )
            {
                info = -1;
                return;
            }
            
            //
            // Copy data, add one more column (constant term)
            //
            xyi = new double[npoints-1+1, nvars+1+1];
            for(i=0; i<=npoints-1; i++)
            {
                for(i_=0; i_<=nvars-1;i_++)
                {
                    xyi[i,i_] = xy[i,i_];
                }
                xyi[i,nvars] = 0;
                xyi[i,nvars+1] = xy[i,nvars];
            }
            
            //
            // Standartization: unusual scaling
            //
            x = new double[npoints-1+1];
            c = new double[nvars-1+1];
            for(j=0; j<=nvars-1; j++)
            {
                for(i_=0; i_<=npoints-1;i_++)
                {
                    x[i_] = xy[i_,j];
                }
                basestat.samplemoments(x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
                if( (double)(Math.Abs(mean))>(double)(Math.Sqrt(variance)) )
                {
                    
                    //
                    // variation is relatively small, it is better to
                    // bring mean value to 1
                    //
                    c[j] = mean;
                }
                else
                {
                    
                    //
                    // variation is large, it is better to bring variance to 1
                    //
                    if( (double)(variance)==(double)(0) )
                    {
                        variance = 1;
                    }
                    c[j] = Math.Sqrt(variance);
                }
                for(i=0; i<=npoints-1; i++)
                {
                    xyi[i,j] = xyi[i,j]/c[j];
                }
            }
            
            //
            // Internal processing
            //
            lrinternal(xyi, s, npoints, nvars+1, ref info, lm, ar);
            if( info<0 )
            {
                return;
            }
            
            //
            // Un-standartization
            //
            offs = (int)Math.Round(lm.w[3]);
            for(j=0; j<=nvars-1; j++)
            {
                
                //
                // J-th term is updated
                //
                lm.w[offs+j] = lm.w[offs+j]/c[j];
                v = 1/c[j];
                for(i_=0; i_<=nvars;i_++)
                {
                    ar.c[j,i_] = v*ar.c[j,i_];
                }
                for(i_=0; i_<=nvars;i_++)
                {
                    ar.c[i_,j] = v*ar.c[i_,j];
                }
            }
        }
Пример #5
0
        /*************************************************************************
        Linear regression

        Variant of LRBuild which uses vector of standatd deviations (errors in
        function values).

        INPUT PARAMETERS:
            XY          -   training set, array [0..NPoints-1,0..NVars]:
                            * NVars columns - independent variables
                            * last column - dependent variable
            S           -   standard deviations (errors in function values)
                            array[0..NPoints-1], S[i]>0.
            NPoints     -   training set size, NPoints>NVars+1
            NVars       -   number of independent variables

        OUTPUT PARAMETERS:
            Info        -   return code:
                            * -255, in case of unknown internal error
                            * -4, if internal SVD subroutine haven't converged
                            * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
                            * -2, if S[I]<=0
                            *  1, if subroutine successfully finished
            LM          -   linear model in the ALGLIB format. Use subroutines of
                            this unit to work with the model.
            AR          -   additional results


          -- ALGLIB --
             Copyright 02.08.2008 by Bochkanov Sergey
        *************************************************************************/
        public static void lrbuilds(double[,] xy,
            double[] s,
            int npoints,
            int nvars,
            ref int info,
            linearmodel lm,
            lrreport ar)
        {
            double[,] xyi = new double[0,0];
            double[] x = new double[0];
            double[] means = new double[0];
            double[] sigmas = new double[0];
            int i = 0;
            int j = 0;
            double v = 0;
            int offs = 0;
            double mean = 0;
            double variance = 0;
            double skewness = 0;
            double kurtosis = 0;
            int i_ = 0;

            info = 0;

            
            //
            // Test parameters
            //
            if( npoints<=nvars+1 || nvars<1 )
            {
                info = -1;
                return;
            }
            
            //
            // Copy data, add one more column (constant term)
            //
            xyi = new double[npoints-1+1, nvars+1+1];
            for(i=0; i<=npoints-1; i++)
            {
                for(i_=0; i_<=nvars-1;i_++)
                {
                    xyi[i,i_] = xy[i,i_];
                }
                xyi[i,nvars] = 1;
                xyi[i,nvars+1] = xy[i,nvars];
            }
            
            //
            // Standartization
            //
            x = new double[npoints-1+1];
            means = new double[nvars-1+1];
            sigmas = new double[nvars-1+1];
            for(j=0; j<=nvars-1; j++)
            {
                for(i_=0; i_<=npoints-1;i_++)
                {
                    x[i_] = xy[i_,j];
                }
                basestat.samplemoments(x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
                means[j] = mean;
                sigmas[j] = Math.Sqrt(variance);
                if( (double)(sigmas[j])==(double)(0) )
                {
                    sigmas[j] = 1;
                }
                for(i=0; i<=npoints-1; i++)
                {
                    xyi[i,j] = (xyi[i,j]-means[j])/sigmas[j];
                }
            }
            
            //
            // Internal processing
            //
            lrinternal(xyi, s, npoints, nvars+1, ref info, lm, ar);
            if( info<0 )
            {
                return;
            }
            
            //
            // Un-standartization
            //
            offs = (int)Math.Round(lm.w[3]);
            for(j=0; j<=nvars-1; j++)
            {
                
                //
                // Constant term is updated (and its covariance too,
                // since it gets some variance from J-th component)
                //
                lm.w[offs+nvars] = lm.w[offs+nvars]-lm.w[offs+j]*means[j]/sigmas[j];
                v = means[j]/sigmas[j];
                for(i_=0; i_<=nvars;i_++)
                {
                    ar.c[nvars,i_] = ar.c[nvars,i_] - v*ar.c[j,i_];
                }
                for(i_=0; i_<=nvars;i_++)
                {
                    ar.c[i_,nvars] = ar.c[i_,nvars] - v*ar.c[i_,j];
                }
                
                //
                // J-th term is updated
                //
                lm.w[offs+j] = lm.w[offs+j]/sigmas[j];
                v = 1/sigmas[j];
                for(i_=0; i_<=nvars;i_++)
                {
                    ar.c[j,i_] = v*ar.c[j,i_];
                }
                for(i_=0; i_<=nvars;i_++)
                {
                    ar.c[i_,j] = v*ar.c[i_,j];
                }
            }
        }
Пример #6
0
    /*************************************************************************
    *  Internal linear regression subroutine
    *************************************************************************/
    private static void lrinternal(ref double[,] xy,
                                   ref double[] s,
                                   int npoints,
                                   int nvars,
                                   ref int info,
                                   ref linearmodel lm,
                                   ref lrreport ar)
    {
        double[,] a   = new double[0, 0];
        double[,] u   = new double[0, 0];
        double[,] vt  = new double[0, 0];
        double[,] vm  = new double[0, 0];
        double[,] xym = new double[0, 0];
        double[]    b      = new double[0];
        double[]    sv     = new double[0];
        double[]    t      = new double[0];
        double[]    svi    = new double[0];
        double[]    work   = new double[0];
        int         i      = 0;
        int         j      = 0;
        int         k      = 0;
        int         ncv    = 0;
        int         na     = 0;
        int         nacv   = 0;
        double      r      = 0;
        double      p      = 0;
        double      epstol = 0;
        lrreport    ar2    = new lrreport();
        int         offs   = 0;
        linearmodel tlm    = new linearmodel();
        int         i_     = 0;
        int         i1_    = 0;

        epstol = 1000;

        //
        // Check for errors in data
        //
        if (npoints < nvars | nvars < 1)
        {
            info = -1;
            return;
        }
        for (i = 0; i <= npoints - 1; i++)
        {
            if (s[i] <= 0)
            {
                info = -2;
                return;
            }
        }
        info = 1;

        //
        // Create design matrix
        //
        a = new double[npoints - 1 + 1, nvars - 1 + 1];
        b = new double[npoints - 1 + 1];
        for (i = 0; i <= npoints - 1; i++)
        {
            r = 1 / s[i];
            for (i_ = 0; i_ <= nvars - 1; i_++)
            {
                a[i, i_] = r * xy[i, i_];
            }
            b[i] = xy[i, nvars] / s[i];
        }

        //
        // Allocate W:
        // W[0]     array size
        // W[1]     version number, 0
        // W[2]     NVars (minus 1, to be compatible with external representation)
        // W[3]     coefficients offset
        //
        lm.w    = new double[4 + nvars - 1 + 1];
        offs    = 4;
        lm.w[0] = 4 + nvars;
        lm.w[1] = lrvnum;
        lm.w[2] = nvars - 1;
        lm.w[3] = offs;

        //
        // Solve problem using SVD:
        //
        // 0. check for degeneracy (different types)
        // 1. A = U*diag(sv)*V'
        // 2. T = b'*U
        // 3. w = SUM((T[i]/sv[i])*V[..,i])
        // 4. cov(wi,wj) = SUM(Vji*Vjk/sv[i]^2,K=1..M)
        //
        // see $15.4 of "Numerical Recipes in C" for more information
        //
        t    = new double[nvars - 1 + 1];
        svi  = new double[nvars - 1 + 1];
        ar.c = new double[nvars - 1 + 1, nvars - 1 + 1];
        vm   = new double[nvars - 1 + 1, nvars - 1 + 1];
        if (!svd.rmatrixsvd(a, npoints, nvars, 1, 1, 2, ref sv, ref u, ref vt))
        {
            info = -4;
            return;
        }
        if (sv[0] <= 0)
        {
            //
            // Degenerate case: zero design matrix.
            //
            for (i = offs; i <= offs + nvars - 1; i++)
            {
                lm.w[i] = 0;
            }
            ar.rmserror      = lrrmserror(ref lm, ref xy, npoints);
            ar.avgerror      = lravgerror(ref lm, ref xy, npoints);
            ar.avgrelerror   = lravgrelerror(ref lm, ref xy, npoints);
            ar.cvrmserror    = ar.rmserror;
            ar.cvavgerror    = ar.avgerror;
            ar.cvavgrelerror = ar.avgrelerror;
            ar.ncvdefects    = 0;
            ar.cvdefects     = new int[nvars - 1 + 1];
            ar.c             = new double[nvars - 1 + 1, nvars - 1 + 1];
            for (i = 0; i <= nvars - 1; i++)
            {
                for (j = 0; j <= nvars - 1; j++)
                {
                    ar.c[i, j] = 0;
                }
            }
            return;
        }
        if (sv[nvars - 1] <= epstol * AP.Math.MachineEpsilon * sv[0])
        {
            //
            // Degenerate case, non-zero design matrix.
            //
            // We can leave it and solve task in SVD least squares fashion.
            // Solution and covariance matrix will be obtained correctly,
            // but CV error estimates - will not. It is better to reduce
            // it to non-degenerate task and to obtain correct CV estimates.
            //
            for (k = nvars; k >= 1; k--)
            {
                if (sv[k - 1] > epstol * AP.Math.MachineEpsilon * sv[0])
                {
                    //
                    // Reduce
                    //
                    xym = new double[npoints - 1 + 1, k + 1];
                    for (i = 0; i <= npoints - 1; i++)
                    {
                        for (j = 0; j <= k - 1; j++)
                        {
                            r = 0.0;
                            for (i_ = 0; i_ <= nvars - 1; i_++)
                            {
                                r += xy[i, i_] * vt[j, i_];
                            }
                            xym[i, j] = r;
                        }
                        xym[i, k] = xy[i, nvars];
                    }

                    //
                    // Solve
                    //
                    lrinternal(ref xym, ref s, npoints, k, ref info, ref tlm, ref ar2);
                    if (info != 1)
                    {
                        return;
                    }

                    //
                    // Convert back to un-reduced format
                    //
                    for (j = 0; j <= nvars - 1; j++)
                    {
                        lm.w[offs + j] = 0;
                    }
                    for (j = 0; j <= k - 1; j++)
                    {
                        r   = tlm.w[offs + j];
                        i1_ = (0) - (offs);
                        for (i_ = offs; i_ <= offs + nvars - 1; i_++)
                        {
                            lm.w[i_] = lm.w[i_] + r * vt[j, i_ + i1_];
                        }
                    }
                    ar.rmserror      = ar2.rmserror;
                    ar.avgerror      = ar2.avgerror;
                    ar.avgrelerror   = ar2.avgrelerror;
                    ar.cvrmserror    = ar2.cvrmserror;
                    ar.cvavgerror    = ar2.cvavgerror;
                    ar.cvavgrelerror = ar2.cvavgrelerror;
                    ar.ncvdefects    = ar2.ncvdefects;
                    ar.cvdefects     = new int[nvars - 1 + 1];
                    for (j = 0; j <= ar.ncvdefects - 1; j++)
                    {
                        ar.cvdefects[j] = ar2.cvdefects[j];
                    }
                    ar.c = new double[nvars - 1 + 1, nvars - 1 + 1];
                    work = new double[nvars + 1];
                    blas.matrixmatrixmultiply(ref ar2.c, 0, k - 1, 0, k - 1, false, ref vt, 0, k - 1, 0, nvars - 1, false, 1.0, ref vm, 0, k - 1, 0, nvars - 1, 0.0, ref work);
                    blas.matrixmatrixmultiply(ref vt, 0, k - 1, 0, nvars - 1, true, ref vm, 0, k - 1, 0, nvars - 1, false, 1.0, ref ar.c, 0, nvars - 1, 0, nvars - 1, 0.0, ref work);
                    return;
                }
            }
            info = -255;
            return;
        }
        for (i = 0; i <= nvars - 1; i++)
        {
            if (sv[i] > epstol * AP.Math.MachineEpsilon * sv[0])
            {
                svi[i] = 1 / sv[i];
            }
            else
            {
                svi[i] = 0;
            }
        }
        for (i = 0; i <= nvars - 1; i++)
        {
            t[i] = 0;
        }
        for (i = 0; i <= npoints - 1; i++)
        {
            r = b[i];
            for (i_ = 0; i_ <= nvars - 1; i_++)
            {
                t[i_] = t[i_] + r * u[i, i_];
            }
        }
        for (i = 0; i <= nvars - 1; i++)
        {
            lm.w[offs + i] = 0;
        }
        for (i = 0; i <= nvars - 1; i++)
        {
            r   = t[i] * svi[i];
            i1_ = (0) - (offs);
            for (i_ = offs; i_ <= offs + nvars - 1; i_++)
            {
                lm.w[i_] = lm.w[i_] + r * vt[i, i_ + i1_];
            }
        }
        for (j = 0; j <= nvars - 1; j++)
        {
            r = svi[j];
            for (i_ = 0; i_ <= nvars - 1; i_++)
            {
                vm[i_, j] = r * vt[j, i_];
            }
        }
        for (i = 0; i <= nvars - 1; i++)
        {
            for (j = i; j <= nvars - 1; j++)
            {
                r = 0.0;
                for (i_ = 0; i_ <= nvars - 1; i_++)
                {
                    r += vm[i, i_] * vm[j, i_];
                }
                ar.c[i, j] = r;
                ar.c[j, i] = r;
            }
        }

        //
        // Leave-1-out cross-validation error.
        //
        // NOTATIONS:
        // A            design matrix
        // A*x = b      original linear least squares task
        // U*S*V'       SVD of A
        // ai           i-th row of the A
        // bi           i-th element of the b
        // xf           solution of the original LLS task
        //
        // Cross-validation error of i-th element from a sample is
        // calculated using following formula:
        //
        //     ERRi = ai*xf - (ai*xf-bi*(ui*ui'))/(1-ui*ui')     (1)
        //
        // This formula can be derived from normal equations of the
        // original task
        //
        //     (A'*A)x = A'*b                                    (2)
        //
        // by applying modification (zeroing out i-th row of A) to (2):
        //
        //     (A-ai)'*(A-ai) = (A-ai)'*b
        //
        // and using Sherman-Morrison formula for updating matrix inverse
        //
        // NOTE 1: b is not zeroed out since it is much simpler and
        // does not influence final result.
        //
        // NOTE 2: some design matrices A have such ui that 1-ui*ui'=0.
        // Formula (1) can't be applied for such cases and they are skipped
        // from CV calculation (which distorts resulting CV estimate).
        // But from the properties of U we can conclude that there can
        // be no more than NVars such vectors. Usually
        // NVars << NPoints, so in a normal case it only slightly
        // influences result.
        //
        ncv              = 0;
        na               = 0;
        nacv             = 0;
        ar.rmserror      = 0;
        ar.avgerror      = 0;
        ar.avgrelerror   = 0;
        ar.cvrmserror    = 0;
        ar.cvavgerror    = 0;
        ar.cvavgrelerror = 0;
        ar.ncvdefects    = 0;
        ar.cvdefects     = new int[nvars - 1 + 1];
        for (i = 0; i <= npoints - 1; i++)
        {
            //
            // Error on a training set
            //
            i1_ = (offs) - (0);
            r   = 0.0;
            for (i_ = 0; i_ <= nvars - 1; i_++)
            {
                r += xy[i, i_] * lm.w[i_ + i1_];
            }
            ar.rmserror = ar.rmserror + AP.Math.Sqr(r - xy[i, nvars]);
            ar.avgerror = ar.avgerror + Math.Abs(r - xy[i, nvars]);
            if (xy[i, nvars] != 0)
            {
                ar.avgrelerror = ar.avgrelerror + Math.Abs((r - xy[i, nvars]) / xy[i, nvars]);
                na             = na + 1;
            }

            //
            // Error using fast leave-one-out cross-validation
            //
            p = 0.0;
            for (i_ = 0; i_ <= nvars - 1; i_++)
            {
                p += u[i, i_] * u[i, i_];
            }
            if (p > 1 - epstol * AP.Math.MachineEpsilon)
            {
                ar.cvdefects[ar.ncvdefects] = i;
                ar.ncvdefects = ar.ncvdefects + 1;
                continue;
            }
            r             = s[i] * (r / s[i] - b[i] * p) / (1 - p);
            ar.cvrmserror = ar.cvrmserror + AP.Math.Sqr(r - xy[i, nvars]);
            ar.cvavgerror = ar.cvavgerror + Math.Abs(r - xy[i, nvars]);
            if (xy[i, nvars] != 0)
            {
                ar.cvavgrelerror = ar.cvavgrelerror + Math.Abs((r - xy[i, nvars]) / xy[i, nvars]);
                nacv             = nacv + 1;
            }
            ncv = ncv + 1;
        }
        if (ncv == 0)
        {
            //
            // Something strange: ALL ui are degenerate.
            // Unexpected...
            //
            info = -255;
            return;
        }
        ar.rmserror = Math.Sqrt(ar.rmserror / npoints);
        ar.avgerror = ar.avgerror / npoints;
        if (na != 0)
        {
            ar.avgrelerror = ar.avgrelerror / na;
        }
        ar.cvrmserror = Math.Sqrt(ar.cvrmserror / ncv);
        ar.cvavgerror = ar.cvavgerror / ncv;
        if (nacv != 0)
        {
            ar.cvavgrelerror = ar.cvavgrelerror / nacv;
        }
    }
Пример #7
0
    /*************************************************************************
    *  Like LRBuildS, but builds model
    *
    *   Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
    *
    *  i.e. with zero constant term.
    *
    *  -- ALGLIB --
    *    Copyright 30.10.2008 by Bochkanov Sergey
    *************************************************************************/
    public static void lrbuildzs(ref double[,] xy,
                                 ref double[] s,
                                 int npoints,
                                 int nvars,
                                 ref int info,
                                 ref linearmodel lm,
                                 ref lrreport ar)
    {
        double[,] xyi = new double[0, 0];
        double[] x        = new double[0];
        double[] c        = new double[0];
        int      i        = 0;
        int      j        = 0;
        double   v        = 0;
        int      offs     = 0;
        double   mean     = 0;
        double   variance = 0;
        double   skewness = 0;
        double   kurtosis = 0;
        int      i_       = 0;


        //
        // Test parameters
        //
        if (npoints <= nvars + 1 | nvars < 1)
        {
            info = -1;
            return;
        }

        //
        // Copy data, add one more column (constant term)
        //
        xyi = new double[npoints - 1 + 1, nvars + 1 + 1];
        for (i = 0; i <= npoints - 1; i++)
        {
            for (i_ = 0; i_ <= nvars - 1; i_++)
            {
                xyi[i, i_] = xy[i, i_];
            }
            xyi[i, nvars]     = 0;
            xyi[i, nvars + 1] = xy[i, nvars];
        }

        //
        // Standartization: unusual scaling
        //
        x = new double[npoints - 1 + 1];
        c = new double[nvars - 1 + 1];
        for (j = 0; j <= nvars - 1; j++)
        {
            for (i_ = 0; i_ <= npoints - 1; i_++)
            {
                x[i_] = xy[i_, j];
            }
            descriptivestatistics.calculatemoments(ref x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
            if (Math.Abs(mean) > Math.Sqrt(variance))
            {
                //
                // variation is relatively small, it is better to
                // bring mean value to 1
                //
                c[j] = mean;
            }
            else
            {
                //
                // variation is large, it is better to bring variance to 1
                //
                if (variance == 0)
                {
                    variance = 1;
                }
                c[j] = Math.Sqrt(variance);
            }
            for (i = 0; i <= npoints - 1; i++)
            {
                xyi[i, j] = xyi[i, j] / c[j];
            }
        }

        //
        // Internal processing
        //
        lrinternal(ref xyi, ref s, npoints, nvars + 1, ref info, ref lm, ref ar);
        if (info < 0)
        {
            return;
        }

        //
        // Un-standartization
        //
        offs = (int)Math.Round(lm.w[3]);
        for (j = 0; j <= nvars - 1; j++)
        {
            //
            // J-th term is updated
            //
            lm.w[offs + j] = lm.w[offs + j] / c[j];
            v = 1 / c[j];
            for (i_ = 0; i_ <= nvars; i_++)
            {
                ar.c[j, i_] = v * ar.c[j, i_];
            }
            for (i_ = 0; i_ <= nvars; i_++)
            {
                ar.c[i_, j] = v * ar.c[i_, j];
            }
        }
    }
Пример #8
0
    /*************************************************************************
    *  Linear regression
    *
    *  Variant of LRBuild which uses vector of standatd deviations (errors in
    *  function values).
    *
    *  INPUT PARAMETERS:
    *   XY          -   training set, array [0..NPoints-1,0..NVars]:
    * NVars columns - independent variables
    * last column - dependent variable
    *   S           -   standard deviations (errors in function values)
    *                   array[0..NPoints-1], S[i]>0.
    *   NPoints     -   training set size, NPoints>NVars+1
    *   NVars       -   number of independent variables
    *
    *  OUTPUT PARAMETERS:
    *   Info        -   return code:
    * -255, in case of unknown internal error
    * -4, if internal SVD subroutine haven't converged
    * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
    * -2, if S[I]<=0
    *  1, if subroutine successfully finished
    *   LM          -   linear model in the ALGLIB format. Use subroutines of
    *                   this unit to work with the model.
    *   AR          -   additional results
    *
    *
    *  -- ALGLIB --
    *    Copyright 02.08.2008 by Bochkanov Sergey
    *************************************************************************/
    public static void lrbuilds(ref double[,] xy,
                                ref double[] s,
                                int npoints,
                                int nvars,
                                ref int info,
                                ref linearmodel lm,
                                ref lrreport ar)
    {
        double[,] xyi = new double[0, 0];
        double[] x        = new double[0];
        double[] means    = new double[0];
        double[] sigmas   = new double[0];
        int      i        = 0;
        int      j        = 0;
        double   v        = 0;
        int      offs     = 0;
        double   mean     = 0;
        double   variance = 0;
        double   skewness = 0;
        double   kurtosis = 0;
        int      i_       = 0;


        //
        // Test parameters
        //
        if (npoints <= nvars + 1 | nvars < 1)
        {
            info = -1;
            return;
        }

        //
        // Copy data, add one more column (constant term)
        //
        xyi = new double[npoints - 1 + 1, nvars + 1 + 1];
        for (i = 0; i <= npoints - 1; i++)
        {
            for (i_ = 0; i_ <= nvars - 1; i_++)
            {
                xyi[i, i_] = xy[i, i_];
            }
            xyi[i, nvars]     = 1;
            xyi[i, nvars + 1] = xy[i, nvars];
        }

        //
        // Standartization
        //
        x      = new double[npoints - 1 + 1];
        means  = new double[nvars - 1 + 1];
        sigmas = new double[nvars - 1 + 1];
        for (j = 0; j <= nvars - 1; j++)
        {
            for (i_ = 0; i_ <= npoints - 1; i_++)
            {
                x[i_] = xy[i_, j];
            }
            descriptivestatistics.calculatemoments(ref x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
            means[j]  = mean;
            sigmas[j] = Math.Sqrt(variance);
            if (sigmas[j] == 0)
            {
                sigmas[j] = 1;
            }
            for (i = 0; i <= npoints - 1; i++)
            {
                xyi[i, j] = (xyi[i, j] - means[j]) / sigmas[j];
            }
        }

        //
        // Internal processing
        //
        lrinternal(ref xyi, ref s, npoints, nvars + 1, ref info, ref lm, ref ar);
        if (info < 0)
        {
            return;
        }

        //
        // Un-standartization
        //
        offs = (int)Math.Round(lm.w[3]);
        for (j = 0; j <= nvars - 1; j++)
        {
            //
            // Constant term is updated (and its covariance too,
            // since it gets some variance from J-th component)
            //
            lm.w[offs + nvars] = lm.w[offs + nvars] - lm.w[offs + j] * means[j] / sigmas[j];
            v = means[j] / sigmas[j];
            for (i_ = 0; i_ <= nvars; i_++)
            {
                ar.c[nvars, i_] = ar.c[nvars, i_] - v * ar.c[j, i_];
            }
            for (i_ = 0; i_ <= nvars; i_++)
            {
                ar.c[i_, nvars] = ar.c[i_, nvars] - v * ar.c[i_, j];
            }

            //
            // J-th term is updated
            //
            lm.w[offs + j] = lm.w[offs + j] / sigmas[j];
            v = 1 / sigmas[j];
            for (i_ = 0; i_ <= nvars; i_++)
            {
                ar.c[j, i_] = v * ar.c[j, i_];
            }
            for (i_ = 0; i_ <= nvars; i_++)
            {
                ar.c[i_, j] = v * ar.c[i_, j];
            }
        }
    }
Пример #9
0
 public override alglib.apobject make_copy()
 {
     lrreport _result = new lrreport();
     _result.c = (double[,])c.Clone();
     _result.rmserror = rmserror;
     _result.avgerror = avgerror;
     _result.avgrelerror = avgrelerror;
     _result.cvrmserror = cvrmserror;
     _result.cvavgerror = cvavgerror;
     _result.cvavgrelerror = cvavgrelerror;
     _result.ncvdefects = ncvdefects;
     _result.cvdefects = (int[])cvdefects.Clone();
     return _result;
 }