public void GradientBoostingTestTowerRelativeError()
        {
            var gbt        = new GradientBoostedTreesAlgorithm();
            var provider   = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
            var instance   = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
            var regProblem = new RegressionProblem();

            regProblem.Load(provider.LoadData(instance));

            #region Algorithm Configuration
            gbt.Problem         = regProblem;
            gbt.Seed            = 0;
            gbt.SetSeedRandomly = false;
            gbt.Iterations      = 3000;
            gbt.MaxSize         = 20;
            gbt.Nu = 0.005;
            gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Relative"));
            gbt.CreateSolution = false;
            #endregion

            gbt.Start();

            Console.WriteLine(gbt.ExecutionTime);
            Assert.AreEqual(0.061954221604374943, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6);
            Assert.AreEqual(0.06316303473499961, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6);
        }
        public void TestDecisionTreePersistence()
        {
            var provider   = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
            var instance   = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
            var regProblem = new RegressionProblem();

            regProblem.Load(provider.LoadData(instance));
            var problemData = regProblem.ProblemData;
            var state       = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 100, r: 0.5, m: 1, nu: 1);

            GradientBoostedTreesAlgorithmStatic.MakeStep(state);

            var model   = ((IGradientBoostedTreesModel)state.GetModel());
            var treeM   = model.Models.Skip(1).First();
            var origStr = treeM.ToString();

            using (var memStream = new MemoryStream()) {
                Persistence.Default.Xml.XmlGenerator.Serialize(treeM, memStream);
                var buf = memStream.GetBuffer();
                using (var restoreStream = new MemoryStream(buf)) {
                    var restoredTree = Persistence.Default.Xml.XmlParser.Deserialize(restoreStream);
                    var restoredStr  = restoredTree.ToString();
                    Assert.AreEqual(origStr, restoredStr);
                }
            }
        }
        public void GradientBoostingTestTowerAbsoluteError()
        {
            var gbt        = new GradientBoostedTreesAlgorithm();
            var provider   = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
            var instance   = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
            var regProblem = new RegressionProblem();

            regProblem.Load(provider.LoadData(instance));

            #region Algorithm Configuration
            gbt.Problem         = regProblem;
            gbt.Seed            = 0;
            gbt.SetSeedRandomly = false;
            gbt.Iterations      = 1000;
            gbt.MaxSize         = 20;
            gbt.Nu = 0.02;
            gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Absolute"));
            gbt.ModelCreation = GradientBoostedTrees.ModelCreation.QualityOnly;
            #endregion

            gbt.Start();

            Console.WriteLine(gbt.ExecutionTime);
            Assert.AreEqual(10.551385044666661, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6);
            Assert.AreEqual(12.918001745581172, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6);
        }
        public void GradientBoostingTestTowerSquaredError()
        {
            var gbt        = new GradientBoostedTreesAlgorithm();
            var provider   = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
            var instance   = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
            var regProblem = new RegressionProblem();

            regProblem.Load(provider.LoadData(instance));

            #region Algorithm Configuration
            gbt.Problem         = regProblem;
            gbt.Seed            = 0;
            gbt.SetSeedRandomly = false;
            gbt.Iterations      = 5000;
            gbt.MaxSize         = 20;
            gbt.CreateSolution  = false;
            #endregion

            gbt.Start();

            Console.WriteLine(gbt.ExecutionTime);
            Assert.AreEqual(267.68704241153921, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6);
            Assert.AreEqual(393.84704062205469, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6);
        }
        public void TestDecisionTreePartialDependence()
        {
            var provider   = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
            var instance   = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
            var regProblem = new RegressionProblem();

            regProblem.Load(provider.LoadData(instance));
            var problemData = regProblem.ProblemData;
            var state       = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02);

            for (int i = 0; i < 1000; i++)
            {
                GradientBoostedTreesAlgorithmStatic.MakeStep(state);
            }


            var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First();

            Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value);
            var model = ((IGradientBoostedTreesModel)state.GetModel());
            var treeM = model.Models.Skip(1).First();

            Console.WriteLine(treeM.ToString());
            Console.WriteLine();

            var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray();
            var ds = new ModifiableDataset(new string[] { mostImportantVar.Key },
                                           new IList[] { mostImportantVarValues.ToList <double>() });

            var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray();

            for (int i = 0; i < mostImportantVarValues.Length; i += 10)
            {
                Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]);
            }
        }
    public void GradientBoostingTestTowerRelativeError() {
      var gbt = new GradientBoostedTreesAlgorithm();
      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
      var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));

      #region Algorithm Configuration
      gbt.Problem = regProblem;
      gbt.Seed = 0;
      gbt.SetSeedRandomly = false;
      gbt.Iterations = 3000;
      gbt.MaxSize = 20;
      gbt.Nu = 0.005;
      gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Relative"));
      gbt.CreateSolution = false;
      #endregion

      RunAlgorithm(gbt);

      Console.WriteLine(gbt.ExecutionTime);
      Assert.AreEqual(0.061954221604374943, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6);
      Assert.AreEqual(0.06316303473499961, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6);
    }
    public void GradientBoostingTestTowerSquaredError() {
      var gbt = new GradientBoostedTreesAlgorithm();
      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
      var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));

      #region Algorithm Configuration
      gbt.Problem = regProblem;
      gbt.Seed = 0;
      gbt.SetSeedRandomly = false;
      gbt.Iterations = 5000;
      gbt.MaxSize = 20;
      gbt.CreateSolution = false;
      #endregion

      RunAlgorithm(gbt);

      Console.WriteLine(gbt.ExecutionTime);
      Assert.AreEqual(267.68704241153921, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6);
      Assert.AreEqual(393.84704062205469, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6);
    }
    public void TestDecisionTreePersistence() {
      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
      var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));
      var problemData = regProblem.ProblemData;
      var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 100, r: 0.5, m: 1, nu: 1);
      GradientBoostedTreesAlgorithmStatic.MakeStep(state);

      var model = ((IGradientBoostedTreesModel)state.GetModel());
      var treeM = model.Models.Skip(1).First();
      var origStr = treeM.ToString();
      using (var memStream = new MemoryStream()) {
        Persistence.Default.Xml.XmlGenerator.Serialize(treeM, memStream);
        var buf = memStream.GetBuffer();
        using (var restoreStream = new MemoryStream(buf)) {
          var restoredTree = Persistence.Default.Xml.XmlParser.Deserialize(restoreStream);
          var restoredStr = restoredTree.ToString();
          Assert.AreEqual(origStr, restoredStr);
        }
      }
    }
    public void TestDecisionTreePartialDependence() {
      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
      var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));
      var problemData = regProblem.ProblemData;
      var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02);
      for (int i = 0; i < 1000; i++)
        GradientBoostedTreesAlgorithmStatic.MakeStep(state);


      var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First();
      Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value);
      var model = ((IGradientBoostedTreesModel)state.GetModel());
      var treeM = model.Models.Skip(1).First();
      Console.WriteLine(treeM.ToString());
      Console.WriteLine();

      var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray();
      var ds = new ModifiableDataset(new string[] { mostImportantVar.Key },
        new IList[] { mostImportantVarValues.ToList<double>() });

      var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray();

      for (int i = 0; i < mostImportantVarValues.Length; i += 10) {
        Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]);
      }
    }