예제 #1
0
            protected internal OptimizerState(IChannel ch, IProgressChannelProvider progress, ref VBuffer <Float> initial,
                                              int m, long totalMemLimit, bool keepDense, bool enforceNonNegativity)
            {
                Contracts.AssertValue(ch);
                Ch = ch;
                ch.AssertValueOrNull(progress);
                ProgressProvider = progress;
                Iter             = 1;

                _keepDense = keepDense;
                Dim        = initial.Length;

                _x = CreateWorkingVector();
                initial.CopyTo(ref _x);
                _m             = m;
                _totalMemLimit = totalMemLimit;

                Dim              = initial.Length;
                _grad            = CreateWorkingVector();
                _dir             = CreateWorkingVector();
                _newX            = CreateWorkingVector();
                _newGrad         = CreateWorkingVector();
                _steepestDescDir = CreateWorkingVector();

                _sList  = new VBuffer <Float> [_m];
                _yList  = new VBuffer <Float> [_m];
                _roList = new List <Float>();

                EnforceNonNegativity = enforceNonNegativity;
            }
예제 #2
0
 internal FunctionOptimizerState(IChannel ch, IProgressChannelProvider progress, DifferentiableFunction function, ref VBuffer <Float> initial, int m,
                                 long totalMemLimit, bool keepDense, bool enforceNonNegativity)
     : base(ch, progress, ref initial, m, totalMemLimit, keepDense, enforceNonNegativity)
 {
     Function = function;
     Init();
 }
        /// <summary>
        /// The gradient being used by the optimizer
        /// </summary>
        protected virtual float DifferentiableFunction(ref VBuffer <float> x, ref VBuffer <float> gradient,
                                                       IProgressChannelProvider progress)
        {
            Contracts.Assert((_numChunks == 0) != (_data == null));
            Contracts.Assert((_cursorFactory == null) == (_data == null));
            Contracts.Assert(x.Length == BiasCount + WeightCount);
            Contracts.Assert(gradient.Length == BiasCount + WeightCount);
            // REVIEW: if/when LBFGS test code is removed, the progress provider needs to become required.
            Contracts.AssertValueOrNull(progress);

            float           scaleFactor = 1 / (float)WeightSum;
            VBuffer <float> xDense      = default(VBuffer <float>);

            if (x.IsDense)
            {
                xDense = x;
            }
            else
            {
                x.CopyToDense(ref xDense);
            }

            IProgressChannel pch = progress != null?progress.StartProgressChannel("Gradient") : null;

            float loss;

            using (pch)
            {
                loss = _data == null
                    ? DifferentiableFunctionMultithreaded(ref xDense, ref gradient, pch)
                    : DifferentiableFunctionStream(_cursorFactory, ref xDense, ref gradient, pch);
            }
            float regLoss = 0;

            if (L2Weight > 0)
            {
                Contracts.Assert(xDense.IsDense);
                var    values = xDense.Values;
                Double r      = 0;
                for (int i = BiasCount; i < values.Length; i++)
                {
                    var xx = values[i];
                    r += xx * xx;
                }
                regLoss = (float)(r * L2Weight * 0.5);

                // Here we probably want to use sparse x
                VBufferUtils.ApplyWithEitherDefined(ref x, ref gradient,
                                                    (int ind, float v1, ref float v2) => { if (ind >= BiasCount)
                                                                                           {
                                                                                               v2 += L2Weight * v1;
                                                                                           }
                                                    });
            }
            VectorUtils.ScaleBy(ref gradient, scaleFactor);

            // REVIEW: The regularization component of the loss is being scaled as well,
            // but it's unclear that it should be scaled.
            return((loss + regLoss) * scaleFactor);
        }
예제 #4
0
            internal L1OptimizerState(IChannel ch, IProgressChannelProvider progress, DifferentiableFunction function, ref VBuffer <Float> initial, int m, long totalMemLimit,
                                      int biasCount, Float l1Weight, bool keepDense, bool enforceNonNegativity)
                : base(ch, progress, ref initial, m, totalMemLimit, keepDense, enforceNonNegativity)
            {
                Contracts.AssertValue(ch);
                ch.Assert(0 <= biasCount && biasCount < initial.Length);
                ch.Assert(l1Weight > 0);

                _biasCount = biasCount;
                _l1weight  = l1Weight;
                _function  = function;

                Init();
            }
예제 #5
0
        internal override OptimizerState MakeState(IChannel ch, IProgressChannelProvider progress, DifferentiableFunction function, ref VBuffer <Float> initial)
        {
            Contracts.AssertValue(ch);
            ch.AssertValue(progress);

            if (EnforceNonNegativity)
            {
                VBufferUtils.Apply(ref initial, delegate(int ind, ref Float initialVal)
                {
                    if (initialVal < 0.0 && ind >= _biasCount)
                    {
                        initialVal = 0;
                    }
                });
            }

            if (_l1weight > 0 && _biasCount < initial.Length)
            {
                return(new L1OptimizerState(ch, progress, function, in initial, M, TotalMemoryLimit, _biasCount, _l1weight, KeepDense, EnforceNonNegativity));
            }
            return(new FunctionOptimizerState(ch, progress, function, in initial, M, TotalMemoryLimit, KeepDense, EnforceNonNegativity));
        }
예제 #6
0
        /// <inheritdoc/>
        private protected override void TrainWithoutLock(IProgressChannelProvider progress, FloatLabelCursor.Factory cursorFactory, Random rand,
                                                         IdToIdxLookup idToIdx, int numThreads, DualsTableBase duals, float[] biasReg, float[] invariants, float lambdaNInv,
                                                         VBuffer <float>[] weights, float[] biasUnreg, VBuffer <float>[] l1IntermediateWeights, float[] l1IntermediateBias, float[] featureNormSquared)
        {
            Contracts.AssertValueOrNull(progress);
            Contracts.Assert(SdcaTrainerOptions.L1Threshold.HasValue);
            Contracts.AssertValueOrNull(idToIdx);
            Contracts.AssertValueOrNull(invariants);
            Contracts.AssertValueOrNull(featureNormSquared);
            int numClasses = Utils.Size(weights);

            Contracts.Assert(Utils.Size(biasReg) == numClasses);
            Contracts.Assert(Utils.Size(biasUnreg) == numClasses);

            int  maxUpdateTrials = 2 * numThreads;
            var  l1Threshold     = SdcaTrainerOptions.L1Threshold.Value;
            bool l1ThresholdZero = l1Threshold == 0;
            var  lr = SdcaTrainerOptions.BiasLearningRate * SdcaTrainerOptions.L2Regularization.Value;

            var pch = progress != null?progress.StartProgressChannel("Dual update") : null;

            using (pch)
                using (var cursor = SdcaTrainerOptions.Shuffle ? cursorFactory.Create(rand) : cursorFactory.Create())
                {
                    long rowCount = 0;
                    if (pch != null)
                    {
                        pch.SetHeader(new ProgressHeader("examples"), e => e.SetProgress(0, rowCount));
                    }

                    Func <DataViewRowId, long> getIndexFromId = GetIndexFromIdGetter(idToIdx, biasReg.Length);
                    while (cursor.MoveNext())
                    {
                        long  idx = getIndexFromId(cursor.Id);
                        long  dualIndexInitPos = idx * numClasses;
                        var   features         = cursor.Features;
                        var   label            = (int)cursor.Label;
                        float invariant;
                        float normSquared;
                        if (invariants != null)
                        {
                            invariant = invariants[idx];
                            Contracts.AssertValue(featureNormSquared);
                            normSquared = featureNormSquared[idx];
                        }
                        else
                        {
                            normSquared = VectorUtils.NormSquared(in features);
                            if (SdcaTrainerOptions.BiasLearningRate == 0)
                            {
                                normSquared += 1;
                            }

                            invariant = _loss.ComputeDualUpdateInvariant(2 * normSquared * lambdaNInv * GetInstanceWeight(cursor));
                        }

                        // The output for the label class using current weights and bias.
                        var labelOutput    = WDot(in features, in weights[label], biasReg[label] + biasUnreg[label]);
                        var instanceWeight = GetInstanceWeight(cursor);

                        // This will be the new dual variable corresponding to the label class.
                        float labelDual = 0;

                        // This will be used to update the weights and regularized bias corresponding to the label class.
                        float labelPrimalUpdate = 0;

                        // This will be used to update the unregularized bias corresponding to the label class.
                        float labelAdjustment = 0;

                        // Iterates through all classes.
                        for (int iClass = 0; iClass < numClasses; iClass++)
                        {
                            // Skip the dual/weights/bias update for label class. Will be taken care of at the end.
                            if (iClass == label)
                            {
                                continue;
                            }

                            var weightsEditor = VBufferEditor.CreateFromBuffer(ref weights[iClass]);
                            var l1IntermediateWeightsEditor =
                                !l1ThresholdZero?VBufferEditor.CreateFromBuffer(ref l1IntermediateWeights[iClass]) :
                                    default;

                            // Loop trials for compare-and-swap updates of duals.
                            // In general, concurrent update conflict to the same dual variable is rare
                            // if data is shuffled.
                            for (int numTrials = 0; numTrials < maxUpdateTrials; numTrials++)
                            {
                                long dualIndex  = iClass + dualIndexInitPos;
                                var  dual       = duals[dualIndex];
                                var  output     = labelOutput + labelPrimalUpdate * normSquared - WDot(in features, in weights[iClass], biasReg[iClass] + biasUnreg[iClass]);
                                var  dualUpdate = _loss.DualUpdate(output, 1, dual, invariant, numThreads);

                                // The successive over-relaxation approach to adjust the sum of dual variables (biasReg) to zero.
                                // Reference to details: http://stat.rutgers.edu/home/tzhang/papers/ml02_dual.pdf, pp. 16-17.
                                var adjustment = l1ThresholdZero ? lr * biasReg[iClass] : lr * l1IntermediateBias[iClass];
                                dualUpdate -= adjustment;
                                bool success = false;
                                duals.ApplyAt(dualIndex, (long index, ref float value) =>
                                              success = Interlocked.CompareExchange(ref value, dual + dualUpdate, dual) == dual);

                                if (success)
                                {
                                    // Note: dualConstraint[iClass] = lambdaNInv * (sum of duals[iClass])
                                    var primalUpdate = dualUpdate * lambdaNInv * instanceWeight;
                                    labelDual         -= dual + dualUpdate;
                                    labelPrimalUpdate += primalUpdate;
                                    biasUnreg[iClass] += adjustment * lambdaNInv * instanceWeight;
                                    labelAdjustment   -= adjustment;

                                    if (l1ThresholdZero)
                                    {
                                        VectorUtils.AddMult(in features, weightsEditor.Values, -primalUpdate);
                                        biasReg[iClass] -= primalUpdate;
                                    }
                                    else
                                    {
                                        //Iterative shrinkage-thresholding (aka. soft-thresholding)
                                        //Update v=denseWeights as if there's no L1
                                        //Thresholding: if |v[j]| < threshold, turn off weights[j]
                                        //If not, shrink: w[j] = v[i] - sign(v[j]) * threshold
                                        l1IntermediateBias[iClass] -= primalUpdate;
                                        if (SdcaTrainerOptions.BiasLearningRate == 0)
                                        {
                                            biasReg[iClass] = Math.Abs(l1IntermediateBias[iClass]) - l1Threshold > 0.0
                                        ? l1IntermediateBias[iClass] - Math.Sign(l1IntermediateBias[iClass]) * l1Threshold
                                        : 0;
                                        }

                                        var featureValues = features.GetValues();
                                        if (features.IsDense)
                                        {
                                            CpuMathUtils.SdcaL1UpdateDense(-primalUpdate, featureValues.Length, featureValues, l1Threshold, l1IntermediateWeightsEditor.Values, weightsEditor.Values);
                                        }
                                        else if (featureValues.Length > 0)
                                        {
                                            CpuMathUtils.SdcaL1UpdateSparse(-primalUpdate, featureValues.Length, featureValues, features.GetIndices(), l1Threshold, l1IntermediateWeightsEditor.Values, weightsEditor.Values);
                                        }
                                    }

                                    break;
                                }
                            }
                        }

                        // Updating with label class weights and dual variable.
                        duals[label + dualIndexInitPos] = labelDual;
                        biasUnreg[label] += labelAdjustment * lambdaNInv * instanceWeight;
                        if (l1ThresholdZero)
                        {
                            var weightsEditor = VBufferEditor.CreateFromBuffer(ref weights[label]);
                            VectorUtils.AddMult(in features, weightsEditor.Values, labelPrimalUpdate);
                            biasReg[label] += labelPrimalUpdate;
                        }
                        else
                        {
                            l1IntermediateBias[label] += labelPrimalUpdate;
                            var intermediateBias = l1IntermediateBias[label];
                            biasReg[label] = Math.Abs(intermediateBias) - l1Threshold > 0.0
                            ? intermediateBias - Math.Sign(intermediateBias) * l1Threshold
                            : 0;

                            var weightsEditor = VBufferEditor.CreateFromBuffer(ref weights[label]);
                            var l1IntermediateWeightsEditor = VBufferEditor.CreateFromBuffer(ref l1IntermediateWeights[label]);
                            var featureValues = features.GetValues();
                            if (features.IsDense)
                            {
                                CpuMathUtils.SdcaL1UpdateDense(labelPrimalUpdate, featureValues.Length, featureValues, l1Threshold, l1IntermediateWeightsEditor.Values, weightsEditor.Values);
                            }
                            else if (featureValues.Length > 0)
                            {
                                CpuMathUtils.SdcaL1UpdateSparse(labelPrimalUpdate, featureValues.Length, featureValues, features.GetIndices(), l1Threshold, l1IntermediateWeightsEditor.Values, weightsEditor.Values);
                            }
                        }

                        rowCount++;
                    }
                }
        }
예제 #7
0
 internal L1OptimizerState(IChannel ch, IProgressChannelProvider progress, DifferentiableFunction function, in VBuffer <Float> initial, int m, long totalMemLimit,
 // Make sure _lossnormalizer is added only once
 protected override float DifferentiableFunction(ref VBuffer <float> x, ref VBuffer <float> gradient, IProgressChannelProvider progress)
 {
     return(base.DifferentiableFunction(ref x, ref gradient, progress) + (float)(_lossNormalizer / NumGoodRows));
 }
예제 #9
0
 internal virtual OptimizerState MakeState(IChannel ch, IProgressChannelProvider progress, DifferentiableFunction function, ref VBuffer <float> initial)
 {
     return(new FunctionOptimizerState(ch, progress, function, in initial, M, TotalMemoryLimit, KeepDense, EnforceNonNegativity));
 }
예제 #10
0
 internal FunctionOptimizerState(IChannel ch, IProgressChannelProvider progress, DifferentiableFunction function, in VBuffer <float> initial, int m,
예제 #11
0
        private static Float QuadTest2D(ref VBuffer <Float> x, ref VBuffer <Float> grad, IProgressChannelProvider progress = null)
        {
            Float d1 = VectorUtils.DotProduct(ref x, ref _c1);
            Float d2 = VectorUtils.DotProduct(ref x, ref _c2);
            Float d3 = VectorUtils.DotProduct(ref x, ref _c3);

            _c3.CopyTo(ref grad);
            VectorUtils.AddMult(ref _c1, d1, ref grad);
            VectorUtils.AddMult(ref _c2, d2, ref grad);
            return((Float)0.5 * (d1 * d1 + d2 * d2) + d3 + 55);
        }
예제 #12
0
            /// <summary>
            /// This is the original differentiable function with the injected L1 term.
            /// </summary>
            private Float EvalCore(ref VBuffer <Float> input, ref VBuffer <Float> gradient, IProgressChannelProvider progress)
            {
                // REVIEW: Leverage Vector methods that use SSE.
                Float res = 0;

                if (!EnforceNonNegativity)
                {
                    if (_biasCount > 0)
                    {
                        VBufferUtils.ForEachDefined(ref input,
                                                    (ind, value) => { if (ind >= _biasCount)
                                                                      {
                                                                          res += Math.Abs(value);
                                                                      }
                                                    });
                    }
                    else
                    {
                        VBufferUtils.ForEachDefined(ref input, (ind, value) => res += Math.Abs(value));
                    }
                }
                else
                {
                    if (_biasCount > 0)
                    {
                        VBufferUtils.ForEachDefined(ref input,
                                                    (ind, value) => { if (ind >= _biasCount)
                                                                      {
                                                                          res += value;
                                                                      }
                                                    });
                    }
                    else
                    {
                        VBufferUtils.ForEachDefined(ref input, (ind, value) => res += value);
                    }
                }
                res = _l1weight * res + _function(ref input, ref gradient, progress);
                return(res);
            }