/// <summary>
        /// Regression AdaBoost learner using the R2 algorithm
        /// using weighted sampling to target the observations with largest error and
        /// weighted median to ensemble the models.
        /// </summary>
        /// <param name="iterations">Number of iterations (models) to boost</param>
        /// <param name="learningRate">How much each boost iteration should add (between 1.0 and 0.0)</param>
        /// <param name="maximumTreeDepth">The maximum depth of the tree models.
        /// 0 will set the depth to default 3</param>
        /// <param name="loss">Type of loss used when boosting weights. Linear is default</param>
        /// <param name="minimumSplitSize">minimum node split size in the trees 1 is default</param>
        /// <param name="minimumInformationGain">The minimum improvement in information gain before a split is made</param>
        /// <param name="seed">Seed for the random sampling</param>
        public RegressionAdaBoostLearner(int iterations = 50, double learningRate = 1, int maximumTreeDepth = 0,
                                         AdaBoostRegressionLoss loss = AdaBoostRegressionLoss.Linear, int minimumSplitSize = 1, double minimumInformationGain = 0.000001, int seed = 42)
        {
            if (iterations < 1)
            {
                throw new ArgumentException("Iterations must be at least 1");
            }
            if (learningRate > 1.0 || learningRate <= 0)
            {
                throw new ArgumentException("learningRate must be larger than zero and smaller than 1.0");
            }
            if (minimumSplitSize <= 0)
            {
                throw new ArgumentException("minimum split size must be larger than 0");
            }
            if (maximumTreeDepth < 0)
            {
                throw new ArgumentException("maximum tree depth must be larger than 0");
            }
            if (minimumInformationGain <= 0)
            {
                throw new ArgumentException("minimum information gain must be larger than 0");
            }

            m_iterations   = iterations;
            m_learningRate = learningRate;

            m_minimumSplitSize       = minimumSplitSize;
            m_maximumTreeDepth       = maximumTreeDepth;
            m_loss                   = loss;
            m_minimumInformationGain = minimumInformationGain;

            m_sampler = new WeightedRandomSampler(seed);
        }
 /// <summary>
 /// Constructs an approximate quantile finder with b buffers, each having k elements.
 /// </summary>
 /// <param name="b">the number of buffers</param>
 /// <param name="k">the number of elements per buffer</param>
 /// <param name="h">the tree height at which sampling shall start.</param>
 /// <param name="precomputeEpsilon">the epsilon for which quantiles shall be precomputed; set this value <=0.0 if nothing shall be precomputed.</param>
 /// <param name="generator">a uniform random number generator.</param>
 public UnknownDoubleQuantileEstimator(int b, int k, int h, double precomputeEpsilon, RandomEngine generator)
 {
     this.sampler = new WeightedRandomSampler(1, generator);
     SetUp(b, k);
     this.treeHeightStartingSampling = h;
     this.precomputeEpsilon          = precomputeEpsilon;
     this.Clear();
 }
Exemplo n.º 3
0
        public void WeightedRandomSampler_Sample_Weight_10()
        {
            var sut     = new WeightedRandomSampler();
            var indices = Enumerable.Range(0, 10).ToArray();
            var weights = new double[] { 1, 1, 1, 1, 1, 10, 10, 10, 10, 10 };

            var actual = new int[indices.Length];

            sut.Sample(indices, weights, actual);

            var expected = new int[] { 2, 5, 6, 7, 7, 8, 8, 8, 9, 9 };

            CollectionAssert.AreEqual(expected, actual);
        }