private LassoFit GetLassoFit(IChannel ch, int maxAllowedFeaturesPerModel)
            Stopwatch stopWatch = Stopwatch.StartNew();

            if (maxAllowedFeaturesPerModel < 0)
                maxAllowedFeaturesPerModel = _numFeatures;
            int numberOfLambdas             = DefaultNumberOFLambdas;
            int maxAllowedFeaturesAlongPath = (int)Math.Min(maxAllowedFeaturesPerModel * 1.2, _numFeatures);

            ch.Info("Lasso Compression uses {0} observations.", _numObservations);

            // lambdaMin = flmin * lambdaMax
            double flmin = (_numObservations < _numFeatures ? 5e-2 : 1e-4);

             * Standardize predictors and target:
             * Center the target and features (mean 0) and normalize their vectors to have the same
             * standard deviation
            double[] featureMeans = new double[_numFeatures];
            double[] featureStds  = new double[_numFeatures];
            double[] feature2residualCorrelations = new double[_numFeatures];

            float factor = (float)(1.0 / Math.Sqrt(_numObservations));

            for (int j = 0; j < _numFeatures; j++)
                double mean = VectorUtils.GetMean(_observations[j]);
                featureMeans[j] = mean;
                    fixed(float *pVector = _observations[j])
                        for (int i = 0; i < _numObservations; i++)
                            pVector[i] = (float)(factor * (pVector[i] - mean));
                featureStds[j] = Math.Sqrt(VectorUtils.GetDotProduct(_observations[j], _observations[j]));

                VectorUtils.DivideInPlace(_observations[j], (float)featureStds[j]);

            float targetMean = (float)VectorUtils.GetMean(_targets);

                fixed(float *pVector = _targets)
                    for (int i = 0; i < _numObservations; i++)
                        pVector[i] = factor * (pVector[i] - targetMean);
            float targetStd = (float)Math.Sqrt(VectorUtils.GetDotProduct(_targets, _targets));

            VectorUtils.DivideInPlace(_targets, targetStd);

            for (int j = 0; j < _numFeatures; j++)
                feature2residualCorrelations[j] = VectorUtils.GetDotProduct(_targets, _observations[j]);

            double[][] feature2featureCorrelations = VectorUtils.AllocateDoubleMatrix(_numFeatures, maxAllowedFeaturesAlongPath);
            double[]   activeWeights           = new double[_numFeatures];
            int[]      correlationCacheIndices = new int[_numFeatures];
            double[]   denseActiveSet          = new double[_numFeatures];

            LassoFit fit = new LassoFit(numberOfLambdas, maxAllowedFeaturesAlongPath, _numFeatures);

            fit.NumberOfLambdas = 0;

            double alf      = Math.Pow(Math.Max(Epsilon, flmin), 1.0 / (numberOfLambdas - 1));
            double rsquared = 0.0;

            fit.NumberOfPasses = 0;
            int numberOfInputs         = 0;
            int minimumNumberOfLambdas = Math.Min(MinNumberOFLambdas, numberOfLambdas);

            double curLambda = 0;
            double maxDelta;

            for (int iteration = 1; iteration <= numberOfLambdas; iteration++)
                ch.Info("Starting iteration {0}: R2={1}", iteration, rsquared);

                 * Compute lambda for this round
                if (iteration == 1)
                    curLambda = Double.MaxValue; // first lambda is infinity
                else if (iteration == 2)
                    curLambda = 0.0;
                    for (int j = 0; j < _numFeatures; j++)
                        curLambda = Math.Max(curLambda, Math.Abs(feature2residualCorrelations[j]));
                    curLambda = alf * curLambda;
                    curLambda = curLambda * alf;

                double prevRsq = rsquared;
                double v;
                    fixed(double *pActiveWeights = activeWeights)
                    fixed(double *pFeature2residualCorrelations = feature2residualCorrelations)
                    fixed(int *pIndices = fit.Indices)
                    fixed(int *pCorrelationCacheIndices = correlationCacheIndices)
                        while (true)
                            maxDelta = 0.0;
                            for (int k = 0; k < _numFeatures; k++)
                                double prevWeight = pActiveWeights[k];
                                double u          = pFeature2residualCorrelations[k] + prevWeight;
                                v = (u >= 0 ? u : -u) - curLambda;
                                // Computes sign(u)(|u| - curLambda)+
                                pActiveWeights[k] = (v > 0 ? (u >= 0 ? v : -v) : 0.0);

                                // Is the weight of this variable changed?
                                // If not, we go to the next one
                                if (pActiveWeights[k] == prevWeight)

                                // If we have not computed the correlations of this
                                // variable with other variables, we do this now and
                                // cache the result
                                if (pCorrelationCacheIndices[k] == 0)
                                    if (numberOfInputs > maxAllowedFeaturesAlongPath)
                                        // we have reached the maximum
                                    for (int j = 0; j < _numFeatures; j++)
                                        // if we have already computed correlations for
                                        // the jth variable, we will reuse it here.
                                        if (pCorrelationCacheIndices[j] != 0)
                                            feature2featureCorrelations[j][numberOfInputs - 1] = feature2featureCorrelations[k][pCorrelationCacheIndices[j] - 1];
                                            // Correlation of variable with itself if one
                                            if (j == k)
                                                feature2featureCorrelations[j][numberOfInputs - 1] = 1.0;
                                                feature2featureCorrelations[j][numberOfInputs - 1] = VectorUtils.GetDotProduct(_observations[j], _observations[k]);
                                    pCorrelationCacheIndices[k]  = numberOfInputs;
                                    pIndices[numberOfInputs - 1] = k;

                                // How much is the weight changed?
                                double delta = pActiveWeights[k] - prevWeight;
                                rsquared += delta * (2.0 * pFeature2residualCorrelations[k] - delta);
                                maxDelta  = Math.Max((delta >= 0 ? delta : -delta), maxDelta);

                                for (int j = 0; j < _numFeatures; j++)
                                    pFeature2residualCorrelations[j] -= feature2featureCorrelations[j][pCorrelationCacheIndices[k] - 1] * delta;

                            if (maxDelta < ConvergenceThreshold || numberOfInputs > maxAllowedFeaturesAlongPath)

                            for (int ii = 0; ii < numberOfInputs; ii++)
                                denseActiveSet[ii] = activeWeights[pIndices[ii]];

                                maxDelta = 0.0;
                                for (int l = 0; l < numberOfInputs; l++)
                                    int    k          = pIndices[l];
                                    double prevWeight = pActiveWeights[k];
                                    double u          = pFeature2residualCorrelations[k] + prevWeight;
                                    v = (u >= 0 ? u : -u) - curLambda;
                                    pActiveWeights[k] = (v > 0 ? (u >= 0 ? v : -v) : 0.0);
                                    if (activeWeights[k] == prevWeight)
                                    double delta = pActiveWeights[k] - prevWeight;
                                    rsquared += delta * (2.0 * pFeature2residualCorrelations[k] - delta);
                                    maxDelta  = Math.Max((delta >= 0 ? delta : -delta), maxDelta);
                                    for (int j = 0; j < numberOfInputs; j++)
                                        pFeature2residualCorrelations[pIndices[j]] -= feature2featureCorrelations[pIndices[j]][pCorrelationCacheIndices[k] - 1] * delta;
                            } while (maxDelta >= ConvergenceThreshold);

                            for (int ii = 0; ii < numberOfInputs; ii++)
                                denseActiveSet[ii] = pActiveWeights[pIndices[ii]] - denseActiveSet[ii];
                            for (int j = 0; j < _numFeatures; j++)
                                if (pCorrelationCacheIndices[j] == 0)
                                    pFeature2residualCorrelations[j] -= VectorUtils.GetDotProduct(denseActiveSet, feature2featureCorrelations[j], numberOfInputs);

                        if (numberOfInputs > maxAllowedFeaturesAlongPath)
                        if (numberOfInputs > 0)
                            for (int ii = 0; ii < numberOfInputs; ii++)
                                fit.CompressedWeights[iteration - 1][ii] = pActiveWeights[pIndices[ii]];
                        fit.NumberOfWeights[iteration - 1] = numberOfInputs;
                        fit.Rsquared[iteration - 1]        = rsquared;
                        fit.Lambdas[iteration - 1]         = curLambda;
                        fit.NumberOfLambdas = iteration;

                        if (iteration < minimumNumberOfLambdas)

                        int me = 0;

                        for (int j = 0; j < numberOfInputs; j++)
                            if (fit.CompressedWeights[iteration - 1][j] != 0.0)
                        if (me > maxAllowedFeaturesPerModel || ((rsquared - prevRsq) < (Small * rsquared)) || rsquared > MaxRSquared)

            for (int k = 0; k < fit.NumberOfLambdas; k++)
                fit.Lambdas[k] = targetStd * fit.Lambdas[k];
                int nk = fit.NumberOfWeights[k];
                for (int l = 0; l < nk; l++)
                    fit.CompressedWeights[k][l] = targetStd * fit.CompressedWeights[k][l] / featureStds[fit.Indices[l]];
                    if (fit.CompressedWeights[k][l] != 0)
                double product = 0;
                for (int i = 0; i < nk; i++)
                    product += fit.CompressedWeights[k][i] * featureMeans[fit.Indices[i]];
                fit.Intercepts[k] = targetMean - product;

            // First lambda was infinity; fixing it
            fit.Lambdas[0] = Math.Exp(2 * Math.Log(fit.Lambdas[1]) - Math.Log(fit.Lambdas[2]));

            ch.Info("Elapsed time for compression: {0}", stopWatch.Elapsed);

Ejemplo n.º 2
        protected override double[] GetGradient(IChannel ch)
            _previousGradient = _currentGradient;

            _currentGradient = ObjectiveFunction.GetGradient(ch, TrainingScores.Scores);
            // We need to make a copy of gradient coz the reference returned is private structare of ObejctiveFunctionBase is valid only till next GetGradient call
            _currentGradient = (double[])_currentGradient.Clone();

            double[] previousDk = _currentDk;

            //First iteration
            if (_previousGradient == null)
                _previousGradient = _currentGradient;
            // Compute Beta[k] = curG[k] * (curG[k] - prevG[k])
            // TODO: this can be optimized for speed. Keeping it slow but simple for now
            double beta = VectorUtils.GetDotProduct(_currentGradient, VectorUtils.Subtract(_currentGradient, _previousGradient)) / VectorUtils.GetDotProduct(_previousGradient, _previousGradient);
#else //Fletcher Reeves step
            // Compute Beta[k] = (curG[k]*cutG[k]) / (prevG[k] * prevG[k])
            double beta = VectorUtils.GetDotProduct(currentGradient, currentGradient) / VectorUtils.GetDotProduct(previousGradient, previousGradient);
            if (beta < 0)
                beta = 0;

            ch.Info("beta: {0}", beta);
            VectorUtils.MutiplyInPlace(previousDk, beta);
            VectorUtils.AddInPlace(previousDk, _currentGradient);
            _currentDk = previousDk; // Reallay no-op opration

            // We know that LeastSquaresRegressionTreeLearner does not destroy gradients so we can return our reference that we will need in next iter.
            if (TreeLearner is LeastSquaresRegressionTreeLearner)
            // Assume that other treLearners destroy the gradient array so return a copy.