/// <summary>
        /// Initialize weights by running SGD up to specified tolerance.
        /// </summary>
        protected virtual VBuffer <float> InitializeWeightsSgd(IChannel ch, FloatLabelCursor.Factory cursorFactory)
        {
            if (!Quiet)
            {
                ch.Info("Running SGD initialization with tolerance {0}", SgdInitializationTolerance);
            }

            int        numExamples  = 0;
            var        oldWeights   = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount);
            DTerminate terminateSgd =
                (in VBuffer <float> x) =>
            {
                if (++numExamples % 1000 != 0)
                {
                    return(false);
                }
                VectorUtils.AddMult(in x, -1, ref oldWeights);
                float normDiff = VectorUtils.Norm(oldWeights);
                x.CopyTo(ref oldWeights);
                // #if OLD_TRACING // REVIEW: How should this be ported?
                if (!Quiet)
                {
                    Console.Write(".");
                    if (numExamples % 50000 == 0)
                    {
                        Console.WriteLine("\t{0}\t{1}", numExamples, normDiff);
                    }
                }
                // #endif
                return(normDiff < SgdInitializationTolerance);
            };

            VBuffer <float>  result = default(VBuffer <float>);
            FloatLabelCursor cursor = null;

            try
            {
                float[] scratch = null;

                SgdOptimizer.DStochasticGradient lossSgd =
                    (in VBuffer <float> x, ref VBuffer <float> grad) =>
                {
                    // Zero out the gradient by sparsifying.
                    grad = new VBuffer <float>(grad.Length, 0, grad.Values, grad.Indices);
                    EnsureBiases(ref grad);

                    if (cursor == null || !cursor.MoveNext())
                    {
                        if (cursor != null)
                        {
                            cursor.Dispose();
                        }
                        cursor = cursorFactory.Create();
                        if (!cursor.MoveNext())
                        {
                            return;
                        }
                    }
                    AccumulateOneGradient(in cursor.Features, cursor.Label, cursor.Weight, in x, ref grad, ref scratch);
                };

                VBuffer <float> sgdWeights;
                if (DenseOptimizer)
                {
                    sgdWeights = VBufferUtils.CreateDense <float>(BiasCount + WeightCount);
                }
                else
                {
                    sgdWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount);
                }
                SgdOptimizer sgdo = new SgdOptimizer(terminateSgd);
                sgdo.Minimize(lossSgd, ref sgdWeights, ref result);
                // #if OLD_TRACING // REVIEW: How should this be ported?
                if (!Quiet)
                {
                    Console.WriteLine();
                }
                // #endif
                ch.Info("SGD initialization done in {0} rounds", numExamples);
            }
            finally
            {
                if (cursor != null)
                {
                    cursor.Dispose();
                }
            }

            return(result);
        }