/// <summary>
        /// Evaluate and sum the function over all indices, in parallel
        /// </summary>
        /// <param name="input">The point at which to evaluate the function</param>
        /// <param name="gradient">The gradient vector, which must be filled in (its initial contents are undefined)</param>
        /// <returns>Function value</returns>
        public Float Eval(ref VBuffer <Float> input, ref VBuffer <Float> gradient)
        {
            _input = input;

            for (int c = 0; c < _threads; ++c)
            {
                ThreadPool.QueueUserWorkItem(Eval, c);
            }

            AutoResetEvent.WaitAll(_threadFinished);

            VectorUtils.ScaleBy(ref gradient, 0);
            Float value = 0;

            for (int c = 0; c < _threads; ++c)
            {
                if (gradient.Length == 0)
                {
                    _tempGrads[c].CopyTo(ref gradient);
                }
                else
                {
                    VectorUtils.Add(ref _tempGrads[c], ref gradient);
                }
                value += _tempVals[c];
            }

            return(value);
        }
        private void Eval(object chunkIndexObj)
        {
            int chunkIndex   = (int)chunkIndexObj;
            int chunkSize    = _maxIndex / _threads;
            int bigChunkSize = chunkSize + 1;
            int numBigChunks = _maxIndex % _threads;
            int from;
            int to;

            if (chunkIndex < numBigChunks)
            {
                from = bigChunkSize * chunkIndex;
                to   = from + bigChunkSize;
            }
            else
            {
                from = bigChunkSize * numBigChunks + chunkSize * (chunkIndex - numBigChunks);
                to   = from + chunkSize;
            }

            _tempVals[chunkIndex] = 0;
            VectorUtils.ScaleBy(ref _tempGrads[chunkIndex], 0);

            VBuffer <Float> tempGrad = default(VBuffer <Float>);

            for (int i = from; i < to; ++i)
            {
                tempGrad = new VBuffer <Float>(0, 0, tempGrad.Values, tempGrad.Indices);
                _tempVals[chunkIndex] += _func(i, ref _input, ref tempGrad);
                if (_tempGrads[chunkIndex].Length == 0)
                {
                    tempGrad.CopyTo(ref _tempGrads[chunkIndex]);
                }
                else
                {
                    VectorUtils.Add(ref tempGrad, ref _tempGrads[chunkIndex]);
                }
            }

            _threadFinished[chunkIndex].Set();
        }