/// <summary> /// Regression AdaBoost learner using the R2 algorithm /// using weighted sampling to target the observations with largest error and /// weighted median to ensemble the models. /// </summary> /// <param name="iterations">Number of iterations (models) to boost</param> /// <param name="learningRate">How much each boost iteration should add (between 1.0 and 0.0)</param> /// <param name="maximumTreeDepth">The maximum depth of the tree models. /// 0 will set the depth to default 3</param> /// <param name="loss">Type of loss used when boosting weights. Linear is default</param> /// <param name="minimumSplitSize">minimum node split size in the trees 1 is default</param> /// <param name="minimumInformationGain">The minimum improvement in information gain before a split is made</param> /// <param name="seed">Seed for the random sampling</param> public RegressionAdaBoostLearner(int iterations = 50, double learningRate = 1, int maximumTreeDepth = 0, AdaBoostRegressionLoss loss = AdaBoostRegressionLoss.Linear, int minimumSplitSize = 1, double minimumInformationGain = 0.000001, int seed = 42) { if (iterations < 1) { throw new ArgumentException("Iterations must be at least 1"); } if (learningRate > 1.0 || learningRate <= 0) { throw new ArgumentException("learningRate must be larger than zero and smaller than 1.0"); } if (minimumSplitSize <= 0) { throw new ArgumentException("minimum split size must be larger than 0"); } if (maximumTreeDepth < 0) { throw new ArgumentException("maximum tree depth must be larger than 0"); } if (minimumInformationGain <= 0) { throw new ArgumentException("minimum information gain must be larger than 0"); } m_iterations = iterations; m_learningRate = learningRate; m_minimumSplitSize = minimumSplitSize; m_maximumTreeDepth = maximumTreeDepth; m_loss = loss; m_minimumInformationGain = minimumInformationGain; m_sampler = new WeightedRandomSampler(seed); }
/// <summary> /// Constructs an approximate quantile finder with b buffers, each having k elements. /// </summary> /// <param name="b">the number of buffers</param> /// <param name="k">the number of elements per buffer</param> /// <param name="h">the tree height at which sampling shall start.</param> /// <param name="precomputeEpsilon">the epsilon for which quantiles shall be precomputed; set this value <=0.0 if nothing shall be precomputed.</param> /// <param name="generator">a uniform random number generator.</param> public UnknownDoubleQuantileEstimator(int b, int k, int h, double precomputeEpsilon, RandomEngine generator) { this.sampler = new WeightedRandomSampler(1, generator); SetUp(b, k); this.treeHeightStartingSampling = h; this.precomputeEpsilon = precomputeEpsilon; this.Clear(); }
public void WeightedRandomSampler_Sample_Weight_10() { var sut = new WeightedRandomSampler(); var indices = Enumerable.Range(0, 10).ToArray(); var weights = new double[] { 1, 1, 1, 1, 1, 10, 10, 10, 10, 10 }; var actual = new int[indices.Length]; sut.Sample(indices, weights, actual); var expected = new int[] { 2, 5, 6, 7, 7, 8, 8, 8, 9, 9 }; CollectionAssert.AreEqual(expected, actual); }