Ejemplo n.º 1
        public virtual double TryEta(AbstractStochasticCachingDiffUpdateFunction function, double[] initial, int[] sample, double eta)
            int numBatches = sample.Length / bSize;

            double[] w      = new double[initial.Length];
            double   wscale = 1;

            System.Array.Copy(initial, 0, w, 0, w.Length);
            int[] sampleBatch = new int[bSize];
            int   sampleIndex = 0;

            for (int batch = 0; batch < numBatches; batch++)
                for (int i = 0; i < bSize; i++)
                    sampleBatch[i] = sample[(sampleIndex + i) % sample.Length];
                sampleIndex += bSize;
                double gain = eta / wscale;
                function.CalculateStochasticUpdate(w, wscale, sampleBatch, gain);
                wscale *= (1 - eta * lambda * bSize);
            double obj = GetObjective(function, w, wscale, sample);

Ejemplo n.º 2
        public virtual double GetObjective(AbstractStochasticCachingDiffUpdateFunction function, double[] w, double wscale, int[] sample)
            double wnorm = GetNorm(w) * wscale * wscale;
            double obj   = function.ValueAt(w, wscale, sample);

            // Calculate objective with L2 regularization
            return(obj + 0.5 * sample.Length * lambda * wnorm);
Ejemplo n.º 3
        /// <summary>Finds a good learning rate to start with.</summary>
        /// <remarks>
        /// Finds a good learning rate to start with.
        /// eta = 1/(lambda*(t0+t)) - we find good t0
        /// </remarks>
        /// <param name="function"/>
        /// <param name="initial"/>
        /// <param name="sampleSize"/>
        /// <param name="seta"/>
        public virtual double Tune(AbstractStochasticCachingDiffUpdateFunction function, double[] initial, int sampleSize, double seta)
            Timing timer = new Timing();

            int[]  sample  = function.GetSample(sampleSize);
            double sobj    = GetObjective(function, initial, 1, sample);
            double besteta = 1;
            double bestobj = sobj;
            double eta     = seta;
            int    totest  = 10;
            double factor  = 2;
            bool   phase2  = false;

            while (totest > 0 || !phase2)
                double obj  = TryEta(function, initial, sample, eta);
                bool   okay = (obj < sobj);
                Sayln("  Trying eta=" + eta + "  obj=" + obj + ((okay) ? "(possible)" : "(too large)"));
                if (okay)
                    totest -= 1;
                    if (obj < bestobj)
                        bestobj = obj;
                        besteta = eta;
                if (!phase2)
                    if (okay)
                        eta = eta * factor;
                        phase2 = true;
                        eta    = seta;
                if (phase2)
                    eta = eta / factor;
            // take it on the safe side (implicit regularization)
            besteta /= factor;
            // determine t
            t0 = (int)(1 / (besteta * lambda));
            Sayln("  Taking eta=" + besteta + " t0=" + t0);
            Sayln("  Tuning completed in: " + Timing.ToSecondsString(timer.Report()) + " s");
        public virtual double[] Minimize(IDiffFunction f, double functionTolerance, double[] initial, int maxIterations)
            int totalSamples = 0;

            Sayln("Using lambda=" + lambda);
            if (f is AbstractStochasticCachingDiffUpdateFunction)
                AbstractStochasticCachingDiffUpdateFunction func = (AbstractStochasticCachingDiffUpdateFunction)f;
                func.sampleMethod = AbstractStochasticCachingDiffFunction.SamplingMethod.Shuffled;
                totalSamples      = func.DataDimension();
                if (bSize > totalSamples)
                    log.Info("WARNING: Total number of samples=" + totalSamples + " is smaller than requested batch size=" + bSize + "!!!");
                    bSize = totalSamples;
                    Sayln("Using batch size=" + bSize);
                if (bSize <= 0)
                    log.Info("WARNING: Requested batch size=" + bSize + " <= 0 !!!");
                    bSize = totalSamples;
                    Sayln("Using batch size=" + bSize);
            x = new double[initial.Length];
            double[] testUpdateCache  = null;
            double[] currentRateCache = null;
            double[] bCache           = null;
            sumGradSquare = new double[initial.Length];
            prevGrad      = new double[initial.Length];
            prevDeltaX    = new double[initial.Length];
            if (useAdaDelta)
                sumDeltaXSquare = new double[initial.Length];
                if (prior != SGDWithAdaGradAndFOBOS.Prior.None && prior != SGDWithAdaGradAndFOBOS.Prior.Gaussian)
                    throw new NotSupportedException("useAdaDelta is currently only supported for Prior.NONE or Prior.GAUSSIAN");
            int[][] featureGrouping = null;
            if (prior != SGDWithAdaGradAndFOBOS.Prior.Lasso && prior != SGDWithAdaGradAndFOBOS.Prior.None)
                testUpdateCache  = new double[initial.Length];
                currentRateCache = new double[initial.Length];
            if (prior != SGDWithAdaGradAndFOBOS.Prior.Lasso && prior != SGDWithAdaGradAndFOBOS.Prior.Ridge && prior != SGDWithAdaGradAndFOBOS.Prior.Gaussian)
                if (!(f is IHasFeatureGrouping))
                    throw new NotSupportedException("prior is specified to be ae-lasso or g-lasso, but function does not support feature grouping");
                featureGrouping = ((IHasFeatureGrouping)f).GetFeatureGrouping();
            if (prior == SGDWithAdaGradAndFOBOS.Prior.sgLASSO)
                bCache = new double[initial.Length];
            System.Array.Copy(initial, 0, x, 0, x.Length);
            int numBatches = 1;

            if (f is AbstractStochasticCachingDiffUpdateFunction)
                if (totalSamples > 0)
                    numBatches = totalSamples / bSize;
            bool have_max = (maxIterations > 0 || numPasses > 0);

            if (!have_max)
                throw new NotSupportedException("No maximum number of iterations has been specified.");
                maxIterations = Math.Max(maxIterations, numPasses * numBatches);
            Sayln("       Batch size of: " + bSize);
            Sayln("       Data dimension of: " + totalSamples);
            Sayln("       Batches per pass through data:  " + numBatches);
            Sayln("       Number of passes is = " + numPasses);
            Sayln("       Max iterations is = " + maxIterations);
            //            Loop
            Timing total   = new Timing();
            Timing current = new Timing();

            int            iters       = 0;
            double         gValue      = 0;
            double         wValue      = 0;
            double         currentRate = 0;
            double         testUpdate  = 0;
            double         realUpdate  = 0;
            IList <double> values      = null;
            double         oldObjVal   = 0;

            for (int pass = 0; pass < numPasses; pass++)
                bool   doEval    = (pass > 0 && evaluateIters > 0 && pass % evaluateIters == 0);
                double evalScore = double.NegativeInfinity;
                if (doEval)
                    evalScore = DoEvaluation(x);
                    if (useEvalImprovement && !ToContinue(x, evalScore))
                // TODO: currently objVal is only updated for GAUSSIAN prior
                // when other priors are used, objVal only reflects the un-regularized obj value
                double objVal   = double.NegativeInfinity;
                double objDelta = double.NegativeInfinity;
                Say("Iter: " + iters + " pass " + pass + " batch 1 ... ");
                int    numOfNonZero      = 0;
                int    numOfNonZeroGroup = 0;
                string gSizeStr          = string.Empty;
                for (int batch = 0; batch < numBatches; batch++)
                    //Get the next gradients
                    // log.info("getting gradients");
                    double[] gradients = null;
                    if (f is AbstractStochasticCachingDiffUpdateFunction)
                        AbstractStochasticCachingDiffUpdateFunction func = (AbstractStochasticCachingDiffUpdateFunction)f;
                        if (bSize == totalSamples)
                            objVal    = func.ValueAt(x);
                            gradients = func.GetDerivative();
                            objDelta  = objVal - oldObjVal;
                            oldObjVal = objVal;
                            if (values == null)
                                values = new List <double>();
                            func.CalculateStochasticGradient(x, bSize);
                            gradients = func.GetDerivative();
                        if (f is AbstractCachingDiffFunction)
                            AbstractCachingDiffFunction func = (AbstractCachingDiffFunction)f;
                            gradients = func.DerivativeAt(x);
                    // log.info("applying regularization");
                    if (prior == SGDWithAdaGradAndFOBOS.Prior.None || prior == SGDWithAdaGradAndFOBOS.Prior.Gaussian)
                        // Gaussian prior is also handled in objective
                        for (int index = 0; index < x.Length; index++)
                            gValue      = gradients[index];
                            currentRate = ComputeLearningRate(index, gValue);
                            // arrive at x(t+1/2)
                            wValue     = x[index];
                            testUpdate = wValue - (currentRate * gValue);
                            realUpdate = testUpdate;
                            UpdateX(x, index, realUpdate);
                        // x[index] = testUpdate;
                        if (prior == SGDWithAdaGradAndFOBOS.Prior.Lasso || prior == SGDWithAdaGradAndFOBOS.Prior.Ridge)
                            double            testUpdateSquaredSum = 0;
                            ICollection <int> paramRange           = null;
                            if (f is IHasRegularizerParamRange)
                                paramRange = ((IHasRegularizerParamRange)f).GetRegularizerParamRange(x);
                                paramRange = new HashSet <int>();
                                for (int i = 0; i < x.Length; i++)
                            foreach (int index in paramRange)
                                gValue      = gradients[index];
                                currentRate = ComputeLearningRate(index, gValue);
                                // arrive at x(t+1/2)
                                wValue     = x[index];
                                testUpdate = wValue - (currentRate * gValue);
                                double currentLambda = currentRate * lambda;
                                // apply FOBOS
                                if (prior == SGDWithAdaGradAndFOBOS.Prior.Lasso)
                                    realUpdate = Math.Signum(testUpdate) * Pospart(Math.Abs(testUpdate) - currentLambda);
                                    UpdateX(x, index, realUpdate);
                                    if (realUpdate != 0)
                                    if (prior == SGDWithAdaGradAndFOBOS.Prior.Ridge)
                                        testUpdateSquaredSum   += testUpdate * testUpdate;
                                        testUpdateCache[index]  = testUpdate;
                                        currentRateCache[index] = currentRate;
                            // } else if (prior == Prior.GAUSSIAN) { // GAUSSIAN prior is assumed to be handled in the objective directly
                            //   realUpdate = testUpdate / (1 + currentLambda);
                            //   updateX(x, index, realUpdate);
                            //   // update objVal
                            //   objVal += currentLambda * wValue * wValue;
                            if (prior == SGDWithAdaGradAndFOBOS.Prior.Ridge)
                                double testUpdateNorm = Math.Sqrt(testUpdateSquaredSum);
                                for (int index_1 = 0; index_1 < testUpdateCache.Length; index_1++)
                                    realUpdate = testUpdateCache[index_1] * Pospart(1 - currentRateCache[index_1] * lambda / testUpdateNorm);
                                    UpdateX(x, index_1, realUpdate);
                                    if (realUpdate != 0)
                            // log.info("featureGroup.length: " + featureGrouping.length);
                            foreach (int[] gFeatureIndices in featureGrouping)
                                // if (gIndex % 100 == 0) log.info(gIndex+" ");
                                double testUpdateSquaredSum = 0;
                                double testUpdateAbsSum     = 0;
                                double M  = gFeatureIndices.Length;
                                double dm = Math.Log(M);
                                foreach (int index in gFeatureIndices)
                                    gValue      = gradients[index];
                                    currentRate = ComputeLearningRate(index, gValue);
                                    // arrive at x(t+1/2)
                                    wValue                  = x[index];
                                    testUpdate              = wValue - (currentRate * gValue);
                                    testUpdateSquaredSum   += testUpdate * testUpdate;
                                    testUpdateAbsSum       += Math.Abs(testUpdate);
                                    testUpdateCache[index]  = testUpdate;
                                    currentRateCache[index] = currentRate;
                                if (prior == SGDWithAdaGradAndFOBOS.Prior.gLASSO)
                                    double testUpdateNorm  = Math.Sqrt(testUpdateSquaredSum);
                                    bool   groupHasNonZero = false;
                                    foreach (int index_1 in gFeatureIndices)
                                        realUpdate = testUpdateCache[index_1] * Pospart(1 - currentRateCache[index_1] * lambda * dm / testUpdateNorm);
                                        UpdateX(x, index_1, realUpdate);
                                        if (realUpdate != 0)
                                            groupHasNonZero = true;
                                    if (groupHasNonZero)
                                    if (prior == SGDWithAdaGradAndFOBOS.Prior.aeLASSO)
                                        int  nonZeroCount    = 0;
                                        bool groupHasNonZero = false;
                                        foreach (int index_1 in gFeatureIndices)
                                            double tau = currentRateCache[index_1] * lambda / (1 + currentRateCache[index_1] * lambda * M) * testUpdateAbsSum;
                                            realUpdate = Math.Signum(testUpdateCache[index_1]) * Pospart(Math.Abs(testUpdateCache[index_1]) - tau);
                                            UpdateX(x, index_1, realUpdate);
                                            if (realUpdate != 0)
                                                groupHasNonZero = true;
                                        if (groupHasNonZero)
                                        // gSizeStr += nonZeroCount+",";
                                        if (prior == SGDWithAdaGradAndFOBOS.Prior.sgLASSO)
                                            double bSquaredSum = 0;
                                            double b           = 0;
                                            foreach (int index_1 in gFeatureIndices)
                                                b = Math.Signum(testUpdateCache[index_1]) * Pospart(Math.Abs(testUpdateCache[index_1]) - currentRateCache[index_1] * alpha * lambda);
                                                bCache[index_1] = b;
                                                bSquaredSum    += b * b;
                                            double bNorm           = Math.Sqrt(bSquaredSum);
                                            int    nonZeroCount    = 0;
                                            bool   groupHasNonZero = false;
                                            foreach (int index_2 in gFeatureIndices)
                                                realUpdate = bCache[index_2] * Pospart(1 - currentRateCache[index_2] * (1.0 - alpha) * lambda * dm / bNorm);
                                                UpdateX(x, index_2, realUpdate);
                                                if (realUpdate != 0)
                                                    groupHasNonZero = true;
                                            if (groupHasNonZero)
                    // gSizeStr += nonZeroCount+",";
                    // log.info();
                    // update gradient and lastX
                    for (int index_3 = 0; index_3 < x.Length; index_3++)
                        prevGrad[index_3] = gradients[index_3];
                // if (hessSampleSize > 0) {
                //   approxHessian();
                // }
                    ArrayMath.AssertFinite(x, "x");
                catch (ArrayMath.InvalidElementException e)
                    for (int i = 0; i < x.Length; i++)
                        x[i] = double.NaN;
                Sayln(numBatches.ToString() + ", n0-fCount:" + numOfNonZero + ((prior != SGDWithAdaGradAndFOBOS.Prior.Lasso && prior != SGDWithAdaGradAndFOBOS.Prior.Ridge) ? ", n0-gCount:" + numOfNonZeroGroup : string.Empty) + ((evalScore != double.NegativeInfinity
                                                                                                                                                                                                                                     ) ? ", evalScore:" + evalScore : string.Empty) + (objVal != double.NegativeInfinity ? ", obj_val:" + nf.Format(objVal) + ", obj_delta:" + objDelta : string.Empty));
                if (values != null && useAvgImprovement && iters > 5)
                    int    size               = values.Count;
                    double previousVal        = (size >= 10 ? values[size - 10] : values[0]);
                    double averageImprovement = (previousVal - objVal) / (size >= 10 ? 10 : size);
                    if (System.Math.Abs(averageImprovement / objVal) < Tol)
                        Sayln("Online Optmization completed, due to average improvement: | newest_val - previous_val | / |newestVal| < TOL ");
                if (iters >= maxIterations)
                    Sayln("Online Optimization complete.  Stopped after max iterations");
                if (total.Report() >= maxTime)
                    Sayln("Online Optimization complete.  Stopped after max time");
            if (evaluateIters > 0)
                // do final evaluation
                double evalScore = (useEvalImprovement ? DoEvaluation(xBest) : DoEvaluation(x));
                Sayln("final evalScore is: " + evalScore);
            Sayln("Completed in: " + Timing.ToSecondsString(total.Report()) + " s");
            return(useEvalImprovement ? xBest : x);
Ejemplo n.º 5
        public virtual double[] Minimize(IFunction f, double functionTolerance, double[] initial, int maxIterations)
            if (!(f is AbstractStochasticCachingDiffUpdateFunction))
                throw new NotSupportedException();
            AbstractStochasticCachingDiffUpdateFunction function = (AbstractStochasticCachingDiffUpdateFunction)f;
            int totalSamples   = function.DataDimension();
            int tuneSampleSize = Math.Min(totalSamples, tuningSamples);

            if (tuneSampleSize < tuningSamples)
                log.Info("WARNING: Total number of samples=" + totalSamples + " is smaller than requested tuning sample size=" + tuningSamples + "!!!");
            lambda = 1.0 / (sigma * totalSamples);
            Sayln("Using sigma=" + sigma + " lambda=" + lambda + " tuning sample size " + tuneSampleSize);
            // tune(function, initial, tuneSampleSize, 0.1);
            t0 = (int)(1 / (0.1 * lambda));
            x  = new double[initial.Length];
            System.Array.Copy(initial, 0, x, 0, x.Length);
            xscale = 1;
            xnorm  = GetNorm(x);
            int numBatches = totalSamples / bSize;

            bool have_max = (maxIterations > 0 || numPasses > 0);

            if (!have_max)
                throw new NotSupportedException("No maximum number of iterations has been specified.");
                maxIterations = Math.Max(maxIterations, numPasses) * numBatches;
            Sayln("       Batch size of: " + bSize);
            Sayln("       Data dimension of: " + totalSamples);
            Sayln("       Batches per pass through data:  " + numBatches);
            Sayln("       Number of passes is = " + numPasses);
            Sayln("       Max iterations is = " + maxIterations);
            //            Loop
            Timing total   = new Timing();
            Timing current = new Timing();
            int    t       = t0;
            int    iters   = 0;

            for (int pass = 0; pass < numPasses; pass++)
                bool doEval = (pass > 0 && evaluateIters > 0 && pass % evaluateIters == 0);
                if (doEval)
                double totalValue = 0;
                double lastValue  = 0;
                for (int batch = 0; batch < numBatches; batch++)
                    //Get the next X
                    double eta  = 1 / (lambda * t);
                    double gain = eta / xscale;
                    lastValue   = function.CalculateStochasticUpdate(x, xscale, bSize, gain);
                    totalValue += lastValue;
                    // weight decay (for L2 regularization)
                    xscale *= (1 - eta * lambda * bSize);
                    t      += bSize;
                if (xscale < 1e-6)
                    ArrayMath.AssertFinite(x, "x");
                catch (ArrayMath.InvalidElementException e)
                    for (int i = 0; i < x.Length; i++)
                        x[i] = double.NaN;
                xnorm = GetNorm(x) * xscale * xscale;
                // Calculate loss based on L2 regularization
                double loss = totalValue + 0.5 * xnorm * lambda * totalSamples;
                Sayln("Iter: " + iters + " pass " + pass + " batch 1 ... " + numBatches.ToString() + " [" + (total.Report()) / 1000.0 + " s " + " {" + (current.Restart() / 1000.0) + " s}] " + lastValue + " " + totalValue + " " + loss);
                if (iters >= maxIterations)
                    Sayln("Stochastic Optimization complete.  Stopped after max iterations");
                if (total.Report() >= maxTime)
                    Sayln("Stochastic Optimization complete.  Stopped after max time");
            if (evaluateIters > 0)
                // do final evaluation
            Sayln("Completed in: " + Timing.ToSecondsString(total.Report()) + " s");
Ejemplo n.º 6
 //This can be filled if an extending class needs to initialize things.
 protected internal virtual void Init(AbstractStochasticCachingDiffUpdateFunction func)