Exemplo n.º 1
0
        public void TestBoston()
        {
            foreach (var name in RegTrees)
            {
                foreach (var criterion in REG_CRITERIONS)
                {
                    var reg = CreateRegressor(name, criterion: criterion, random: new Random(0));
                    reg.Fit(boston.Data, boston.Target);
                    var score = Sharpkit.Learn.Metrics.Metrics.MeanSquaredError(boston.Target,
                                                                                reg.Predict(boston.Data).Column(0));
                    Assert.IsTrue(score < 1,
                                  "Failed with {0}, criterion = {1} and score = {2}".Frmt(name, criterion, score));


                    // using fewer features reduces the learning ability of this tree,
                    // but reduces training time.
                    reg = CreateRegressor(name, criterion: criterion, max_features: MaxFeaturesChoice.Value(6),
                                          random: new Random(0));
                    reg.Fit(boston.Data, boston.Target);
                    score = Sharpkit.Learn.Metrics.Metrics.MeanSquaredError(boston.Target,
                                                                            reg.Predict(boston.Data).Column(0));
                    Assert.IsTrue(score < 2,
                                  "Failed with {0}, criterion = {1} and score = {2}".Frmt(name, criterion, score));
                }
            }
        }
Exemplo n.º 2
0
        public DecisionTreeRegressor CreateRegressor(
            string name,
            Criterion criterion            = Criterion.Mse,
            int?max_depth                  = null,
            int min_samples_split          = 2,
            int min_samples_leaf           = 1,
            MaxFeaturesChoice max_features = null,
            Random random                  = null
            )
        {
            switch (name)
            {
            case "DecisionTreeRegressor":
                return(new DecisionTreeRegressor(criterion, Splitter.Best, max_depth, min_samples_split, min_samples_leaf,
                                                 max_features, random));

            case "Presort-DecisionTreeRegressor":
                return(new DecisionTreeRegressor(criterion, Splitter.PresortBest, max_depth, min_samples_split,
                                                 min_samples_leaf, max_features, random));

            case "ExtraTreeRegressor":
                return(new ExtraTreeRegressor(criterion, Splitter.Random, max_depth, min_samples_split, min_samples_leaf,
                                              max_features, random));
            }

            throw new InvalidOperationException("Unexpected name");
        }
Exemplo n.º 3
0
        public void TestClassificationToy()
        {
            foreach (var name in CLF_TREES)
            {
                var clf = CreateClassifier <double>(name, random: new Random(0));
                clf.Fit(X, y);
                AssertExt.ArrayEqual(clf.Predict(T), trueResult, "Failed with {0}".Frmt(name));


                clf = CreateClassifier <double>(name, max_features: MaxFeaturesChoice.Value(1), random: new Random(1));
                clf.Fit(X, y);
                AssertExt.ArrayEqual(clf.Predict(T), trueResult, "Failed with {0}".Frmt(name));
            }
        }
Exemplo n.º 4
0
        public void TestError()
        {
            foreach (var name in CLF_TREES)
            {
                // predict before fit
                var est = CreateClassifier <double>(name);
                AssertExt.Raises <InvalidOperationException>(() => est.PredictProba(X));

                est.Fit(X, y);
                var x2 = new double[] { -2, -1, 1 }.ToColumnMatrix(); // wrong feature shape for sample
                AssertExt.Raises <ArgumentException>(() => est.PredictProba(x2));
            }

            foreach (var name in CLF_TREES)
            {
                // Invalid values for parameters
                AssertExt.Raises <ArgumentException>(() => CreateClassifier <double>(name, min_samples_leaf: -1).Fit(X, y));
                AssertExt.Raises <ArgumentException>(() => CreateClassifier <double>(name, min_samples_split: -1).Fit(X, y));
                AssertExt.Raises <ArgumentException>(() => CreateClassifier <double>(name, max_depth: -1).Fit(X, y));
                AssertExt.Raises <ArgumentException>(
                    () => CreateClassifier <double>(name, max_features: MaxFeaturesChoice.Value(42)).Fit(X, y));

                // Wrong dimensions
                var est = CreateClassifier <double>(name);

                var y2 = y.Subarray(0, y.Length - 1);
                AssertExt.Raises <ArgumentException>(() => est.Fit(X, y2));

                // predict before fitting
                est = CreateClassifier <double>(name);
                AssertExt.Raises <InvalidOperationException>(() => est.Predict(T));

                // predict on vector with different dims
                est.Fit(X, y);
                AssertExt.Raises <ArgumentException>(
                    () => est.Predict(T.Subarray(0, T.GetLength(0), 1, T.GetLength(1) - 1)));

                // wrong sample shape
                est = CreateClassifier <double>(name);
                est.Fit(X.ToDenseMatrix() * X.ToDenseMatrix().Transpose(), y);
                AssertExt.Raises <ArgumentException>(() => est.Predict(X));


                est = CreateClassifier <double>(name);
                est.Fit(X, y);
                AssertExt.Raises <ArgumentException>(() => est.Predict(X.ToDenseMatrix().Transpose()));
            }
        }
Exemplo n.º 5
0
        public void TestRegressionToy()
        {
            foreach (var name in RegTrees)
            {
                var reg = CreateRegressor(name, random: new Random(1));
                reg.Fit(X, y);
                AssertExt.ArrayEqual(reg.PredictSingle(T), trueResult,
                                     "Failed with {0}".Frmt(name));


                var clf = CreateRegressor(name, max_features: MaxFeaturesChoice.Value(1), random: new Random(1));
                clf.Fit(X, y);
                AssertExt.AlmostEqual(reg.PredictSingle(T), trueResult,
                                      "Failed with {0}".Frmt(name));
            }
        }
 /// <summary>
 /// Initializes a new instance of the ExtraTreeRegressor class.
 /// </summary>
 /// <param name="criterion"> The function to measure the quality of a split. The only supported
 /// criterion is <see cref="Criterion.Mse"/> for the mean squared error.</param>
 /// <param name="splitter">The strategy used to choose the split at each node. Supported
 /// strategies are <see cref="Splitter.Best"/> to choose the best split and <see cref="Splitter.Random"/> to choose
 /// the best random split.</param>
 /// <param name="maxDepth">The maximum depth of the tree. If <c>null</c>, then nodes are expanded until
 /// all leaves are pure or until all leaves contain less than
 /// <paramref name="minSamplesSplit"/> samples.</param>
 /// <param name="minSamplesSplit">The minimum number of samples required to split an internal node.</param>
 /// <param name="minSamplesLeaf">The minimum number of samples required to be at a leaf node.</param>
 /// <param name="maxFeatures">Number of features to consider when looking for the best split. If null - 
 /// then all features will be considered.</param>
 /// <param name="random">random number generator</param>
 public ExtraTreeRegressor(
     Criterion criterion = Criterion.Mse,
     Splitter splitter = Splitter.Random,
     int? maxDepth = null,
     int minSamplesSplit = 2,
     int minSamplesLeaf = 1,
     MaxFeaturesChoice maxFeatures = null,
     Random random = null) : base(criterion,
                                  splitter,
                                  maxDepth,
                                  minSamplesSplit,
                                  minSamplesLeaf,
                                  maxFeatures ?? MaxFeaturesChoice.Auto(),
                                  random)
 {
 }
Exemplo n.º 7
0
        public void TestProbability()
        {
            foreach (var name in CLF_TREES)
            {
                var clf = CreateClassifier <int>(name, max_depth: 1, max_features: MaxFeaturesChoice.Value(1),
                                                 random: new Random(42));
                clf.Fit(iris.Data, iris.Target);


                var prob_predict = clf.PredictProba(iris.Data);
                AssertExt.AlmostEqual(prob_predict.SumOfEveryRow().ToArray(),
                                      DenseVector.Create(iris.Data.RowCount, i => 1.0).ToArray(),
                                      "Failed with {0}".Frmt(name));

                AssertExt.ArrayEqual(prob_predict.ArgmaxColumns(),
                                     clf.Predict(iris.Data),
                                     "Failed with {0}".Frmt(name));

                AssertExt.AlmostEqual(clf.PredictProba(iris.Data),
                                      clf.PredictLogProba(iris.Data).Exp(),
                                      "Failed with {0}".Frmt(name), 10E-8);
            }
        }
Exemplo n.º 8
0
        public void TestMaxFeatures()
        {
            foreach (var name in RegTrees)
            {
                var reg = CreateRegressor(name, max_features: MaxFeaturesChoice.Auto());
                reg.Fit(boston.Data, boston.Target);
                Assert.AreEqual(boston.Data.ColumnCount, reg.MaxFeaturesValue);
            }


            foreach (var name in CLF_TREES)
            {
                var clf = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Auto());
                clf.Fit(iris.Data, iris.Target);
                Assert.AreEqual(2, clf.MaxFeaturesValue);
            }

            foreach (var name in CLF_TREES)
            {
                var est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Sqrt());
                est.Fit(iris.Data, iris.Target);
                Assert.AreEqual(Math.Sqrt(iris.Data.ColumnCount), est.MaxFeaturesValue);


                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Log2());
                est.Fit(iris.Data, iris.Target);
                Assert.AreEqual(Math.Log(iris.Data.ColumnCount, 2), est.MaxFeaturesValue);

                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Value(1));
                est.Fit(iris.Data, iris.Target);
                Assert.AreEqual(1, est.MaxFeaturesValue);

                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Value(3));
                est.Fit(iris.Data, iris.Target);
                Assert.AreEqual(3, est.MaxFeaturesValue);

                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Fraction(0.5));
                est.Fit(iris.Data, iris.Target);
                Assert.AreEqual((int)(0.5 * iris.Data.ColumnCount), est.MaxFeaturesValue);

                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Fraction(1.0));
                est.Fit(iris.Data, iris.Target);
                Assert.AreEqual(iris.Data.ColumnCount, est.MaxFeaturesValue);

                //est = CreateTree<int>(name, max_features: null);
                //est.Fit(iris.Data, iris.Target);
                //Assert.AreEqual(est.max_features_, iris.Data.ColumnCount);

                var y_ = y.Select(v => (int)v).ToArray();
                // use values of max_features that are invalid
                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Value(10));
                AssertExt.Raises <ArgumentException>(() => est.Fit(X, y_));

                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Value(-1));
                AssertExt.Raises <ArgumentException>(() => est.Fit(X, y_));

                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Fraction(0.0));
                AssertExt.Raises <ArgumentException>(() => est.Fit(X, y_));

                est = CreateClassifier <int>(name, max_features: MaxFeaturesChoice.Fraction(1.5));
                AssertExt.Raises <ArgumentException>(() => est.Fit(X, y_));
            }
        }
Exemplo n.º 9
0
        public void TestIris()
        {
            foreach (var name in CLF_TREES)
            {
                foreach (var criterion in CLF_CRITERIONS)
                {
                    var clf = CreateClassifier <int>(name, criterion: criterion, random: new Random(0));
                    clf.Fit(iris.Data, iris.Target);
                    var score = clf.Score(iris.Data, iris.Target);
                    Assert.IsTrue(score > 0.9,
                                  "Failed with {0}, criterion = {1} and score = {2}".Frmt(name, criterion, score));

                    clf = CreateClassifier <int>(name, criterion: criterion, max_features: MaxFeaturesChoice.Value(2),
                                                 random: new Random(0));
                    clf.Fit(iris.Data, iris.Target);
                    score = clf.Score(iris.Data, iris.Target);
                    Assert.IsTrue(score > 0.9,
                                  "Failed with {0}, criterion = {1} and score = {2}".Frmt(name, criterion, score));
                }
            }
        }