Beispiel #1
0
        /// <summary>
        ///     A common feature of the above techniques—indeed, the key technique that
        ///     allows us to track the decayed weights efficiently—is that they maintain
        ///     counts and other quantities based on g(ti − L), and only scale by g(t − L)
        ///     at query time. But while g(ti −L)/g(t−L) is guaranteed to lie between zero
        ///     and one, the intermediate values of g(ti − L) could become very large. For
        ///     polynomial functions, these values should not grow too large, and should be
        ///     effectively represented in practice by floating point values without loss of
        ///     precision. For exponential functions, these values could grow quite large as
        ///     new values of (ti − L) become large, and potentially exceed the capacity of
        ///     common floating point types. However, since the values stored by the
        ///     algorithms are linear combinations of g values (scaled sums), they can be
        ///     rescaled relative to a new landmark. That is, by the analysis of exponential
        ///     decay in Section III-A, the choice of L does not affect the final result. We
        ///     can therefore multiply each value based on L by a factor of exp(−α(L′ − L)),
        ///     and obtain the correct value as if we had instead computed relative to a new
        ///     landmark L′ (and then use this new L′ at query time). This can be done with
        ///     a linear pass over whatever data structure is being used."
        /// </summary>
        public void Rescale()
        {
            var lockTaken = false;

            try
            {
                _lock.Enter(ref lockTaken);
                var oldStartTime = _startTime.GetValue();
                _startTime.SetValue(_clock.Seconds);

                var scalingFactor = Math.Exp(-_alpha * (_startTime.GetValue() - oldStartTime));

                var keys = new List <double>(_values.Keys);
                foreach (var key in keys)
                {
                    var sample = _values[key];
                    _values.Remove(key);
                    var newKey    = key * Math.Exp(-_alpha * (_startTime.GetValue() - oldStartTime));
                    var newSample = new WeightedSample(sample.Value, sample.UserValue, sample.Weight * scalingFactor);
                    _values[newKey] = newSample;
                }

                // make sure the counter is in sync with the number of stored samples.
                _count.SetValue(_values.Count);
            }
            finally
            {
                if (lockTaken)
                {
                    _lock.Exit();
                }
            }
        }
        private void Update(long value, string userValue, long timestamp)
        {
            Logger.Debug("Updating {Reservoir}", this);
            var lockTaken = false;

            try
            {
                _lock.Enter(ref lockTaken);

                Logger.Trace("Lock entered for reservoir update");

                var itemWeight = Math.Exp(_alpha * (timestamp - _startTime.GetValue()));
                var sample     = new WeightedSample(value, userValue, itemWeight);

                var random = 0.0;

                // Prevent division by 0
                while (random.Equals(.0))
                {
                    random = ThreadLocalRandom.NextDouble();
                }

                var priority = itemWeight / random;

                var newCount = _count.GetValue();
                newCount++;
                _count.SetValue(newCount);
                _sum.Add(value);

                if (newCount <= _sampleSize)
                {
                    _values[priority] = sample;
                }
                else
                {
                    var first = _values.Keys[_values.Count - 1];
                    if (first < priority)
                    {
                        _values.Remove(first);
                        _values[priority] = sample;
                    }
                }
            }
            finally
            {
                if (lockTaken)
                {
                    _lock.Exit();
                    Logger.Trace("Lock exited after updating {Reservoir}", this);
                }

                Logger.Debug("{Reservoir} updated", this);
            }
        }
Beispiel #3
0
        /// <summary>
        ///     A common feature of the above techniques—indeed, the key technique that
        ///     allows us to track the decayed weights efficiently—is that they maintain
        ///     counts and other quantities based on g(ti − L), and only scale by g(t − L)
        ///     at query time. But while g(ti −L)/g(t−L) is guaranteed to lie between zero
        ///     and one, the intermediate values of g(ti − L) could become very large. For
        ///     polynomial functions, these values should not grow too large, and should be
        ///     effectively represented in practice by floating point values without loss of
        ///     precision. For exponential functions, these values could grow quite large as
        ///     new values of (ti − L) become large, and potentially exceed the capacity of
        ///     common floating point types. However, since the values stored by the
        ///     algorithms are linear combinations of g values (scaled sums), they can be
        ///     rescaled relative to a new landmark. That is, by the analysis of exponential
        ///     decay in Section III-A, the choice of L does not affect the final result. We
        ///     can therefore multiply each value based on L by a factor of exp(−α(L′ − L)),
        ///     and obtain the correct value as if we had instead computed relative to a new
        ///     landmark L′ (and then use this new L′ at query time). This can be done with
        ///     a linear pass over whatever data structure is being used."
        /// </summary>
        public void Rescale()
        {
            var lockTaken = false;

            Logger.Trace("Rescaling {Reservoir}", this);

            try
            {
                _lock.Enter(ref lockTaken);

                Logger.Trace("Lock entered for rescaling {Reservoir}", this);

                var oldStartTime = _startTime.GetValue();
                _startTime.SetValue(_clock.Seconds);

                var scalingFactor = Math.Exp(-_alpha * (_startTime.GetValue() - oldStartTime));
                var newSamples    = new Dictionary <double, WeightedSample>(_values.Count);

                foreach (var keyValuePair in _values)
                {
                    var sample = keyValuePair.Value;

                    var newWeight = sample.Weight * scalingFactor;
                    if (newWeight < _minimumSampleWeight)
                    {
                        continue;
                    }

                    var newKey    = keyValuePair.Key * scalingFactor;
                    var newSample = new WeightedSample(sample.Value, sample.UserValue, newWeight);
                    newSamples[newKey] = newSample;
                }

                _values          = new SortedList <double, WeightedSample>(newSamples, ReverseOrderDoubleComparer.Instance);
                _values.Capacity = _sampleSize;

                // make sure the counter is in sync with the number of stored samples.
                _count.SetValue(_values.Count);
                _sum.SetValue(_values.Values.Sum(sample => sample.Value));
            }
            finally
            {
                if (lockTaken)
                {
                    _lock.Exit();
                    Logger.Trace("Lock exited after rescaling {Reservoir}", this);
                }

                Logger.Trace("{Reservoir} rescaled", this);
            }
        }