示例#1
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="dataSet">The data set for this actor</param>
 /// <param name="hybridEstimatorFactory">Factory for creating estimators</param>
 /// <param name="bloomFilterFactory">Factory for creating Bloom filters</param>
 /// <param name="configuration">Bloom filter configuration to use</param>
 public PrecalculatedActor(IList <TestEntity> dataSet,
                           IHybridEstimatorFactory hybridEstimatorFactory,
                           IInvertibleBloomFilterFactory bloomFilterFactory,
                           IInvertibleBloomFilterConfiguration <TestEntity, long, int, TCount> configuration)
 {
     _protobufModel = TypeModel.Create();
     _protobufModel.UseImplicitZeroDefaults = true;
     _hybridEstimatorFactory = hybridEstimatorFactory;
     _configuration          = configuration;
     //terribly over size the estimator.
     _estimator = _hybridEstimatorFactory.Create(_configuration, 100000);
     foreach (var itm in dataSet)
     {
         _estimator.Add(itm);
     }
     //sized to number of differences it can handle, not to the size of the data.
     _filter = bloomFilterFactory.Create(_configuration, 5000, 0.001F, true);
     foreach (var item in dataSet)
     {
         _filter.Add(item);
     }
 }
        public void HybridEstimatorPerformanceMeasurement()
        {
            var configuration = new LargeBloomFilterConfiguration();
            var testSizes     = new [] { 5000, 10000, 100000, 500000 };
            var errorSizes    = new [] { 1, 5, 10, 20, 50, 75, 100 };
            var capacities    = new long[] { 80, 250, 1000 };
            var stratas       = new byte[] { 3, 7, 9, 13, 19, 25, 32 };

            foreach (var dataSize in testSizes)
            {
                foreach (var errorSize in errorSizes)
                {
                    using (
                        var writer =
                            new StreamWriter(File.Open($"hybridestimator-{dataSize}-{errorSize}.csv",
                                                       FileMode.Create)))
                    {
                        writer.WriteLine(
                            "duration,dataSize,strata,capacity,modCount,estimatedModCount,countDiff,countDiffSd,decodeSuccessRate");
                        foreach (var capacity in capacities)
                        {
                            foreach (var strata in stratas)
                            {
                                var timeSpanAggregate       = new TimeSpan[50];
                                var countAggregate          = new int[50];
                                var modCountResultAggregate = new int[50];
                                var decodeResult            = new int[50];

                                for (var run = 0; run < 50; run++)
                                {
                                    var testData   = DataGenerator.Generate().Take(dataSize).ToList();
                                    var modCount   = (int)(dataSize / 100.0D * errorSize);
                                    var startTime  = DateTime.UtcNow;
                                    var estimator1 = new HybridEstimator <TestEntity, long, int>(capacity, strata, configuration);
                                    estimator1.Initialize((uint)testData.Count, 2, 10);
                                    foreach (var item in testData)
                                    {
                                        estimator1.Add(item);
                                    }
                                    testData.Modify(modCount);
                                    var estimator2 = new HybridEstimator <TestEntity, long, int>(capacity, strata, configuration);
                                    estimator2.Initialize((uint)testData.Count, 2, 10);
                                    foreach (var item in testData)
                                    {
                                        estimator2.Add(item);
                                    }
                                    var measuredModCount = estimator1.Decode(estimator2);
                                    timeSpanAggregate[run]       = DateTime.UtcNow.Subtract(startTime);
                                    countAggregate[run]          = modCount;
                                    modCountResultAggregate[run] = (int)(measuredModCount ?? 0L);
                                    decodeResult[run]            = measuredModCount.HasValue ? 1 : 0;
                                }
                                var timeAvg          = new TimeSpan((long)timeSpanAggregate.Select(t => t.Ticks).Average());
                                var countAvg         = (long)countAggregate.Average();
                                var modCountResult   = (long)modCountResultAggregate.Average();
                                var differenceResult =
                                    modCountResultAggregate.Select((r, i) => r - countAggregate[i]).ToArray();
                                var differenceSd = Math.Sqrt(differenceResult.Variance());
                                writer.WriteLine(
                                    $"{timeAvg.TotalMilliseconds},{dataSize},{strata},{capacity},{countAvg},{modCountResult},{(long) differenceResult.Average()},{differenceSd},{1.0D * decodeResult.Sum() / 50}");
                            }
                        }
                    }
                }
            }
        }