コード例 #1
0
        private LassoFit GetLassoFit(IChannel ch, int maxAllowedFeaturesPerModel)
        {
            Stopwatch stopWatch = Stopwatch.StartNew();

            if (maxAllowedFeaturesPerModel < 0)
            {
                maxAllowedFeaturesPerModel = _numFeatures;
            }
            int numberOfLambdas             = DefaultNumberOFLambdas;
            int maxAllowedFeaturesAlongPath = (int)Math.Min(maxAllowedFeaturesPerModel * 1.2, _numFeatures);

            ch.Info("Lasso Compression uses {0} observations.", _numObservations);

            // lambdaMin = flmin * lambdaMax
            double flmin = (_numObservations < _numFeatures ? 5e-2 : 1e-4);

            /********************************
             * Standardize predictors and target:
             * Center the target and features (mean 0) and normalize their vectors to have the same
             * standard deviation
             */
            double[] featureMeans = new double[_numFeatures];
            double[] featureStds  = new double[_numFeatures];
            double[] feature2residualCorrelations = new double[_numFeatures];

            float factor = (float)(1.0 / Math.Sqrt(_numObservations));

            for (int j = 0; j < _numFeatures; j++)
            {
                double mean = VectorUtils.GetMean(_observations[j]);
                featureMeans[j] = mean;
                unsafe
                {
                    fixed(float *pVector = _observations[j])
                    {
                        for (int i = 0; i < _numObservations; i++)
                        {
                            pVector[i] = (float)(factor * (pVector[i] - mean));
                        }
                    }
                }
                featureStds[j] = Math.Sqrt(VectorUtils.GetDotProduct(_observations[j], _observations[j]));

                VectorUtils.DivideInPlace(_observations[j], (float)featureStds[j]);
            }

            float targetMean = (float)VectorUtils.GetMean(_targets);

            unsafe
            {
                fixed(float *pVector = _targets)
                {
                    for (int i = 0; i < _numObservations; i++)
                    {
                        pVector[i] = factor * (pVector[i] - targetMean);
                    }
                }
            }
            float targetStd = (float)Math.Sqrt(VectorUtils.GetDotProduct(_targets, _targets));

            VectorUtils.DivideInPlace(_targets, targetStd);

            for (int j = 0; j < _numFeatures; j++)
            {
                feature2residualCorrelations[j] = VectorUtils.GetDotProduct(_targets, _observations[j]);
            }

            double[][] feature2featureCorrelations = VectorUtils.AllocateDoubleMatrix(_numFeatures, maxAllowedFeaturesAlongPath);
            double[]   activeWeights           = new double[_numFeatures];
            int[]      correlationCacheIndices = new int[_numFeatures];
            double[]   denseActiveSet          = new double[_numFeatures];

            LassoFit fit = new LassoFit(numberOfLambdas, maxAllowedFeaturesAlongPath, _numFeatures);

            fit.NumberOfLambdas = 0;

            double alf      = Math.Pow(Math.Max(Epsilon, flmin), 1.0 / (numberOfLambdas - 1));
            double rsquared = 0.0;

            fit.NumberOfPasses = 0;
            int numberOfInputs         = 0;
            int minimumNumberOfLambdas = Math.Min(MinNumberOFLambdas, numberOfLambdas);

            double curLambda = 0;
            double maxDelta;

            for (int iteration = 1; iteration <= numberOfLambdas; iteration++)
            {
                ch.Info("Starting iteration {0}: R2={1}", iteration, rsquared);

                /**********
                 * Compute lambda for this round
                 */
                if (iteration == 1)
                {
                    curLambda = Double.MaxValue; // first lambda is infinity
                }
                else if (iteration == 2)
                {
                    curLambda = 0.0;
                    for (int j = 0; j < _numFeatures; j++)
                    {
                        curLambda = Math.Max(curLambda, Math.Abs(feature2residualCorrelations[j]));
                    }
                    curLambda = alf * curLambda;
                }
                else
                {
                    curLambda = curLambda * alf;
                }

                double prevRsq = rsquared;
                double v;
                unsafe
                {
                    fixed(double *pActiveWeights = activeWeights)
                    fixed(double *pFeature2residualCorrelations = feature2residualCorrelations)
                    fixed(int *pIndices = fit.Indices)
                    fixed(int *pCorrelationCacheIndices = correlationCacheIndices)
                    {
                        while (true)
                        {
                            fit.NumberOfPasses++;
                            maxDelta = 0.0;
                            for (int k = 0; k < _numFeatures; k++)
                            {
                                double prevWeight = pActiveWeights[k];
                                double u          = pFeature2residualCorrelations[k] + prevWeight;
                                v = (u >= 0 ? u : -u) - curLambda;
                                // Computes sign(u)(|u| - curLambda)+
                                pActiveWeights[k] = (v > 0 ? (u >= 0 ? v : -v) : 0.0);

                                // Is the weight of this variable changed?
                                // If not, we go to the next one
                                if (pActiveWeights[k] == prevWeight)
                                {
                                    continue;
                                }

                                // If we have not computed the correlations of this
                                // variable with other variables, we do this now and
                                // cache the result
                                if (pCorrelationCacheIndices[k] == 0)
                                {
                                    numberOfInputs++;
                                    if (numberOfInputs > maxAllowedFeaturesAlongPath)
                                    {
                                        // we have reached the maximum
                                        break;
                                    }
                                    for (int j = 0; j < _numFeatures; j++)
                                    {
                                        // if we have already computed correlations for
                                        // the jth variable, we will reuse it here.
                                        if (pCorrelationCacheIndices[j] != 0)
                                        {
                                            feature2featureCorrelations[j][numberOfInputs - 1] = feature2featureCorrelations[k][pCorrelationCacheIndices[j] - 1];
                                        }
                                        else
                                        {
                                            // Correlation of variable with itself if one
                                            if (j == k)
                                            {
                                                feature2featureCorrelations[j][numberOfInputs - 1] = 1.0;
                                            }
                                            else
                                            {
                                                feature2featureCorrelations[j][numberOfInputs - 1] = VectorUtils.GetDotProduct(_observations[j], _observations[k]);
                                            }
                                        }
                                    }
                                    pCorrelationCacheIndices[k]  = numberOfInputs;
                                    pIndices[numberOfInputs - 1] = k;
                                }

                                // How much is the weight changed?
                                double delta = pActiveWeights[k] - prevWeight;
                                rsquared += delta * (2.0 * pFeature2residualCorrelations[k] - delta);
                                maxDelta  = Math.Max((delta >= 0 ? delta : -delta), maxDelta);

                                for (int j = 0; j < _numFeatures; j++)
                                {
                                    pFeature2residualCorrelations[j] -= feature2featureCorrelations[j][pCorrelationCacheIndices[k] - 1] * delta;
                                }
                            }

                            if (maxDelta < ConvergenceThreshold || numberOfInputs > maxAllowedFeaturesAlongPath)
                            {
                                break;
                            }

                            for (int ii = 0; ii < numberOfInputs; ii++)
                            {
                                denseActiveSet[ii] = activeWeights[pIndices[ii]];
                            }

                            do
                            {
                                fit.NumberOfPasses++;
                                maxDelta = 0.0;
                                for (int l = 0; l < numberOfInputs; l++)
                                {
                                    int    k          = pIndices[l];
                                    double prevWeight = pActiveWeights[k];
                                    double u          = pFeature2residualCorrelations[k] + prevWeight;
                                    v = (u >= 0 ? u : -u) - curLambda;
                                    pActiveWeights[k] = (v > 0 ? (u >= 0 ? v : -v) : 0.0);
                                    if (activeWeights[k] == prevWeight)
                                    {
                                        continue;
                                    }
                                    double delta = pActiveWeights[k] - prevWeight;
                                    rsquared += delta * (2.0 * pFeature2residualCorrelations[k] - delta);
                                    maxDelta  = Math.Max((delta >= 0 ? delta : -delta), maxDelta);
                                    for (int j = 0; j < numberOfInputs; j++)
                                    {
                                        pFeature2residualCorrelations[pIndices[j]] -= feature2featureCorrelations[pIndices[j]][pCorrelationCacheIndices[k] - 1] * delta;
                                    }
                                }
                            } while (maxDelta >= ConvergenceThreshold);

                            for (int ii = 0; ii < numberOfInputs; ii++)
                            {
                                denseActiveSet[ii] = pActiveWeights[pIndices[ii]] - denseActiveSet[ii];
                            }
                            for (int j = 0; j < _numFeatures; j++)
                            {
                                if (pCorrelationCacheIndices[j] == 0)
                                {
                                    pFeature2residualCorrelations[j] -= VectorUtils.GetDotProduct(denseActiveSet, feature2featureCorrelations[j], numberOfInputs);
                                }
                            }
                        }

                        if (numberOfInputs > maxAllowedFeaturesAlongPath)
                        {
                            break;
                        }
                        if (numberOfInputs > 0)
                        {
                            for (int ii = 0; ii < numberOfInputs; ii++)
                            {
                                fit.CompressedWeights[iteration - 1][ii] = pActiveWeights[pIndices[ii]];
                            }
                        }
                        fit.NumberOfWeights[iteration - 1] = numberOfInputs;
                        fit.Rsquared[iteration - 1]        = rsquared;
                        fit.Lambdas[iteration - 1]         = curLambda;
                        fit.NumberOfLambdas = iteration;

                        if (iteration < minimumNumberOfLambdas)
                        {
                            continue;
                        }

                        int me = 0;

                        for (int j = 0; j < numberOfInputs; j++)
                        {
                            if (fit.CompressedWeights[iteration - 1][j] != 0.0)
                            {
                                me++;
                            }
                        }
                        if (me > maxAllowedFeaturesPerModel || ((rsquared - prevRsq) < (Small * rsquared)) || rsquared > MaxRSquared)
                        {
                            break;
                        }
                    }
                }
            }

            for (int k = 0; k < fit.NumberOfLambdas; k++)
            {
                fit.Lambdas[k] = targetStd * fit.Lambdas[k];
                int nk = fit.NumberOfWeights[k];
                for (int l = 0; l < nk; l++)
                {
                    fit.CompressedWeights[k][l] = targetStd * fit.CompressedWeights[k][l] / featureStds[fit.Indices[l]];
                    if (fit.CompressedWeights[k][l] != 0)
                    {
                        fit.NonZeroWeights[k]++;
                    }
                }
                double product = 0;
                for (int i = 0; i < nk; i++)
                {
                    product += fit.CompressedWeights[k][i] * featureMeans[fit.Indices[i]];
                }
                fit.Intercepts[k] = targetMean - product;
            }

            // First lambda was infinity; fixing it
            fit.Lambdas[0] = Math.Exp(2 * Math.Log(fit.Lambdas[1]) - Math.Log(fit.Lambdas[2]));

            stopWatch.Stop();
            ch.Info("Elapsed time for compression: {0}", stopWatch.Elapsed);

            return(fit);
        }
コード例 #2
0
        protected override double[] GetGradient(IChannel ch)
        {
            Contracts.AssertValue(ch);
            _previousGradient = _currentGradient;

            _currentGradient = ObjectiveFunction.GetGradient(ch, TrainingScores.Scores);
            // We need to make a copy of gradient coz the reference returned is private structare of ObejctiveFunctionBase is valid only till next GetGradient call
            _currentGradient = (double[])_currentGradient.Clone();

            double[] previousDk = _currentDk;

            //First iteration
            if (_previousGradient == null)
            {
                _previousGradient = _currentGradient;
            }
#if !POLAK_RIBIERE_STEP
            // Compute Beta[k] = curG[k] * (curG[k] - prevG[k])
            // TODO: this can be optimized for speed. Keeping it slow but simple for now
            double beta = VectorUtils.GetDotProduct(_currentGradient, VectorUtils.Subtract(_currentGradient, _previousGradient)) / VectorUtils.GetDotProduct(_previousGradient, _previousGradient);
#else //Fletcher Reeves step
            // Compute Beta[k] = (curG[k]*cutG[k]) / (prevG[k] * prevG[k])
            double beta = VectorUtils.GetDotProduct(currentGradient, currentGradient) / VectorUtils.GetDotProduct(previousGradient, previousGradient);
#endif
            if (beta < 0)
            {
                beta = 0;
            }

            ch.Info("beta: {0}", beta);
            VectorUtils.MutiplyInPlace(previousDk, beta);
            VectorUtils.AddInPlace(previousDk, _currentGradient);
            _currentDk = previousDk; // Reallay no-op opration

            // We know that LeastSquaresRegressionTreeLearner does not destroy gradients so we can return our reference that we will need in next iter.
            if (TreeLearner is LeastSquaresRegressionTreeLearner)
            {
                return(_currentDk);
            }
            // Assume that other treLearners destroy the gradient array so return a copy.
            else
            {
                return((double[])_currentDk.Clone());
            }
        }