Beispiel #1
0
        public void Reference_equality_should_be_correct()
        {
            var first  = new WeightedSample(1, null, 1);
            var second = new WeightedSample(2, null, 1);

            Assert.False(first.Equals((object)second));
        }
Beispiel #2
0
        public void Hash_codes_differ_between_instances()
        {
            var first  = new WeightedSample(1, null, 1).GetHashCode();
            var second = new WeightedSample(2, null, 1).GetHashCode();

            Assert.NotEqual(first, second);
        }
Beispiel #3
0
        public void Hash_codes_same_for_same_instance()
        {
            var first  = new WeightedSample(1, null, 1);
            var second = first;

            Assert.Equal(first.GetHashCode(), second.GetHashCode());
        }
Beispiel #4
0
        public void Can_determine_if_weighted_samples_are_same()
        {
            var first  = new WeightedSample(1, null, 1);
            var second = new WeightedSample(1, null, 1);

            first.Should().Be(second);
        }
Beispiel #5
0
        public void Can_determine_if_weighted_samples_are_same_using_operator()
        {
            var first  = new WeightedSample(1, null, 1);
            var second = new WeightedSample(1, null, 1);

            Assert.True(first == second);
        }
 // ReSharper disable MemberCanBePrivate.Global
 public bool Equals(WeightedSample other)
 // ReSharper restore MemberCanBePrivate.Global
 {
     // ReSharper disable ImpureMethodCallOnReadonlyValueField
     return(string.Equals(UserValue, other.UserValue) && Value == other.Value && Weight.Equals(other.Weight));
     // ReSharper restore ImpureMethodCallOnReadonlyValueField
 }
Beispiel #7
0
        public void CookieJar_Example_Weighted(int total, int choc1, int choc2, int expectedNumerator, int expectedDenominator)
        {
            // Given we have 2 jars
            // jar one contains 10 choc and 30 vanilla
            // jar two contains 20 choc and 20 vanilla
            // what is the probability of drawing a 
            // vanilla cookie from jar 1

            var sample1 = new WeightedSample<Cookie>("Jar1");
            var sample2 = new WeightedSample<Cookie>("Jar2");
            var hypos = new HypoSet<Cookie>("All");

            hypos.Add(sample1, sample2);

            var chocx = new Cookie() { F = 'C' };
            var vanix = new Cookie() { F = 'V' };

            var choc = It.Is(chocx);
            var vani = It.Is(vanix);

            sample1[chocx] = choc1;
            sample1[vanix] = total - choc1;
            sample2[chocx] = choc2;
            sample2[vanix] = total - choc2;

            sample1.ProbabilityOfEvent(choc);
            sample2.ProbabilityOfEvent(choc);

            var postProb = hypos.PosterierProbability(sample1, vani);

            Assert.That(postProb.Numerator, Is.EqualTo(expectedNumerator));
            Assert.That(postProb.Denominator, Is.EqualTo(expectedDenominator));
        }
        public void Rescale()
        {
            Verify.NotDisposed(!disposed_, ReservoirDisposedMessage);

            ExecuteAsCriticalSection(() => {
                var oldStartTime = startTime_;
                startTime_       = clock_.Seconds;

                var scalingFactor = Math.Exp(-alpha_ * (startTime_ - oldStartTime));

                var newSamples = new Dictionary <double, WeightedSample>(values_.Count);

                foreach (var keyValuePair in values_)
                {
                    var sample = keyValuePair.Value;

                    var newWeight = sample.Weight * scalingFactor;
                    if (newWeight < sampleWeightThreshold_)
                    {
                        continue;
                    }

                    var newKey    = keyValuePair.Key * scalingFactor;
                    var newSample = new WeightedSample(sample.Value, sample.UserValue, sample.Weight * scalingFactor);
                    newSamples.Add(newKey, newSample);
                }

                values_ = new SortedList <double, WeightedSample>(newSamples, ReverseOrderDoubleComparer.Instance);

                // Need to reset the samples counter after rescaling
                count_ = values_.Count;
                sum_   = values_.Values.Aggregate(0L, (current, sample) => current + sample.Value);
            });
        }
        public void can_determine_if_weighted_samples_are_diff()
        {
            var first  = new WeightedSample(1, null, 1);
            var second = new WeightedSample(1, null, 2);

            first.Should().NotBe(second);
        }
        private void Update(long value, string userValue, long timestamp)
        {
            Verify.NotDisposed(!disposed_, ReservoirDisposedMessage);

            var itemWeight = Math.Exp(alpha_ * (timestamp - startTime_));
            var sample     = new WeightedSample(value, userValue, itemWeight);

            var random = 0.0;

            // Prevent division by 0
            // TODO: what about underflow?
            while (random.Equals(0.0))
            {
                random = ThreadLocalRandom.NextDouble();
            }

            var priority = itemWeight / random;

            ExecuteAsCriticalSection(() => {
                count_++;
                sum_ += value;

                if (count_ <= sampleSize_)
                {
                    values_[priority] = sample;
                }
                else
                {
                    var first = values_.Keys[values_.Count - 1];
                    if (first < priority)
                    {
                        values_.Remove(first);
                        values_[priority] = sample;
                    }
                }
            });
        }
Beispiel #11
0
 public bool Equals(WeightedSample other)
 {
     return(string.Equals(UserValue, other.UserValue) && Value == other.Value && Weight.Equals(other.Weight));
 }
        /// <summary>
        /// Adds an old value with a fixed timestamp to the reservoir.
        /// </summary>
        /// <param name="value">the value to be added</param>
        /// <param name="timestamp">the epoch timestamp of value in seconds</param>
        public void Update(long value, long timestamp)
        {
            rescaleIfNeeded();
            lockForRegularUsage();
            _lock.EnterReadLock();
            try
            {
                var itemWeight = Weight(timestamp - _startTime);
                WeightedSample sample = new WeightedSample(value, itemWeight);
                var random = ThreadLocalRandom.NextNonzeroDouble();
                var priority = itemWeight / random;

                var newCount = _count.IncrementAndGet();

                if (newCount <= _size)
                {
                    _values.AddOrUpdate(priority, sample, (p, v) => v);
                }
                else
                {
                    var first = _values.Keys.Min();
                    if (first < priority)
                    {
                        _values.AddOrUpdate(priority, sample, (p, v) => v);

                        WeightedSample removed;
                        while (!_values.TryRemove(first, out removed))
                        {
                            first = _values.Keys.First();
                        }
                    }
                }
            }
            finally
            {
                unlockForRegularUsage();
                _lock.ExitReadLock();
            }
        }
        /// <summary>
        /// "A common feature of the above techniques—indeed, the key technique that
        /// allows us to track the decayed weights efficiently—is that they maintain
        /// counts and other quantities based on g(ti − L), and only scale by g(t − L)
        /// at query time. But while g(ti −L)/g(t−L) is guaranteed to lie between zero
        /// and one, the intermediate values of g(ti − L) could become very large. For
        /// polynomial functions, these values should not grow too large, and should be
        /// effectively represented in practice by floating point values without loss of
        /// precision. For exponential functions, these values could grow quite large as
        /// new values of (ti − L) become large, and potentially exceed the capacity of
        /// common floating point types. However, since the values stored by the
        /// algorithms are linear combinations of g values (scaled sums), they can be
        /// rescaled relative to a new landmark. That is, by the analysis of exponential
        /// decay in Section III-A, the choice of L does not affect the final result. We
        /// can therefore multiply each value based on L by a factor of exp(−α(L′ − L)),
        /// and obtain the correct value as if we had instead computed relative to a new
        /// landmark L′ (and then use this new L′ at query time). This can be done with
        /// a linear pass over whatever data structure is being used."
        /// </summary>
        /// <param name="now"></param>
        /// <param name="next"></param>
        private void Rescale(long now, long next)
        {
            if (_nextScaleTime.CompareAndSet(next, now + RESCALE_THRESHOLD))
            {
                lockForRescale();
                try
                {
                    var oldStartTime = _startTime;
                    _startTime = CurrentTimeInSeconds();
                    double scalingFactor = Math.Exp(-_alpha * (_startTime - oldStartTime));

                    var keys = new List<double>(_values.Keys);
                    foreach (double key in keys)
                    {
                        WeightedSample sample = null;
                        if (_values.TryRemove(key, out sample))
                        {
                            WeightedSample newSample = new WeightedSample(sample.value, sample.weight * scalingFactor);
                            _values.AddOrUpdate(key * scalingFactor, newSample, (k, v) => v);
                        }
                    }
                }
                finally
                {
                    unlockForRescale();
                }
            }
        }