/// <summary> /// A common feature of the above techniques—indeed, the key technique that /// allows us to track the decayed weights efficiently—is that they maintain /// counts and other quantities based on g(ti − L), and only scale by g(t − L) /// at query time. But while g(ti −L)/g(t−L) is guaranteed to lie between zero /// and one, the intermediate values of g(ti − L) could become very large. For /// polynomial functions, these values should not grow too large, and should be /// effectively represented in practice by floating point values without loss of /// precision. For exponential functions, these values could grow quite large as /// new values of (ti − L) become large, and potentially exceed the capacity of /// common floating point types. However, since the values stored by the /// algorithms are linear combinations of g values (scaled sums), they can be /// rescaled relative to a new landmark. That is, by the analysis of exponential /// decay in Section III-A, the choice of L does not affect the final result. We /// can therefore multiply each value based on L by a factor of exp(−α(L′ − L)), /// and obtain the correct value as if we had instead computed relative to a new /// landmark L′ (and then use this new L′ at query time). This can be done with /// a linear pass over whatever data structure is being used." /// </summary> public void Rescale() { var lockTaken = false; try { _lock.Enter(ref lockTaken); var oldStartTime = _startTime.GetValue(); _startTime.SetValue(_clock.Seconds); var scalingFactor = Math.Exp(-_alpha * (_startTime.GetValue() - oldStartTime)); var keys = new List <double>(_values.Keys); foreach (var key in keys) { var sample = _values[key]; _values.Remove(key); var newKey = key * Math.Exp(-_alpha * (_startTime.GetValue() - oldStartTime)); var newSample = new WeightedSample(sample.Value, sample.UserValue, sample.Weight * scalingFactor); _values[newKey] = newSample; } // make sure the counter is in sync with the number of stored samples. _count.SetValue(_values.Count); } finally { if (lockTaken) { _lock.Exit(); } } }
private void Update(long value, string userValue, long timestamp) { Logger.Debug("Updating {Reservoir}", this); var lockTaken = false; try { _lock.Enter(ref lockTaken); Logger.Trace("Lock entered for reservoir update"); var itemWeight = Math.Exp(_alpha * (timestamp - _startTime.GetValue())); var sample = new WeightedSample(value, userValue, itemWeight); var random = 0.0; // Prevent division by 0 while (random.Equals(.0)) { random = ThreadLocalRandom.NextDouble(); } var priority = itemWeight / random; var newCount = _count.GetValue(); newCount++; _count.SetValue(newCount); _sum.Add(value); if (newCount <= _sampleSize) { _values[priority] = sample; } else { var first = _values.Keys[_values.Count - 1]; if (first < priority) { _values.Remove(first); _values[priority] = sample; } } } finally { if (lockTaken) { _lock.Exit(); Logger.Trace("Lock exited after updating {Reservoir}", this); } Logger.Debug("{Reservoir} updated", this); } }
/// <summary> /// A common feature of the above techniques—indeed, the key technique that /// allows us to track the decayed weights efficiently—is that they maintain /// counts and other quantities based on g(ti − L), and only scale by g(t − L) /// at query time. But while g(ti −L)/g(t−L) is guaranteed to lie between zero /// and one, the intermediate values of g(ti − L) could become very large. For /// polynomial functions, these values should not grow too large, and should be /// effectively represented in practice by floating point values without loss of /// precision. For exponential functions, these values could grow quite large as /// new values of (ti − L) become large, and potentially exceed the capacity of /// common floating point types. However, since the values stored by the /// algorithms are linear combinations of g values (scaled sums), they can be /// rescaled relative to a new landmark. That is, by the analysis of exponential /// decay in Section III-A, the choice of L does not affect the final result. We /// can therefore multiply each value based on L by a factor of exp(−α(L′ − L)), /// and obtain the correct value as if we had instead computed relative to a new /// landmark L′ (and then use this new L′ at query time). This can be done with /// a linear pass over whatever data structure is being used." /// </summary> public void Rescale() { var lockTaken = false; Logger.Trace("Rescaling {Reservoir}", this); try { _lock.Enter(ref lockTaken); Logger.Trace("Lock entered for rescaling {Reservoir}", this); var oldStartTime = _startTime.GetValue(); _startTime.SetValue(_clock.Seconds); var scalingFactor = Math.Exp(-_alpha * (_startTime.GetValue() - oldStartTime)); var newSamples = new Dictionary <double, WeightedSample>(_values.Count); foreach (var keyValuePair in _values) { var sample = keyValuePair.Value; var newWeight = sample.Weight * scalingFactor; if (newWeight < _minimumSampleWeight) { continue; } var newKey = keyValuePair.Key * scalingFactor; var newSample = new WeightedSample(sample.Value, sample.UserValue, newWeight); newSamples[newKey] = newSample; } _values = new SortedList <double, WeightedSample>(newSamples, ReverseOrderDoubleComparer.Instance); _values.Capacity = _sampleSize; // make sure the counter is in sync with the number of stored samples. _count.SetValue(_values.Count); _sum.SetValue(_values.Values.Sum(sample => sample.Value)); } finally { if (lockTaken) { _lock.Exit(); Logger.Trace("Lock exited after rescaling {Reservoir}", this); } Logger.Trace("{Reservoir} rescaled", this); } }