private GaussianProcessRegression CreateGaussianProcessRegressionSample() {
      var gpr = new GaussianProcessRegression();
      var provider = new VariousInstanceProvider();
      var instance = provider.GetDataDescriptors().Where(x => x.Name.Contains("Spatial co-evolution")).Single();
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));

      #region Algorithm Configuration
      gpr.Name = "Gaussian Process Regression";
      gpr.Description = "A Gaussian process regression algorithm which solves the spatial co-evolution benchmark problem";
      gpr.Problem = regProblem;

      gpr.CovarianceFunction = new CovarianceSquaredExponentialIso();
      gpr.MeanFunction = new MeanConst();
      gpr.MinimizationIterations = 20;
      gpr.Seed = 0;
      gpr.SetSeedRandomly = true;
      #endregion

      gpr.Engine = new ParallelEngine.ParallelEngine();
      return gpr;
    }
        private GaussianProcessRegression CreateGaussianProcessRegressionSample()
        {
            var gpr        = new GaussianProcessRegression();
            var provider   = new VariousInstanceProvider();
            var instance   = provider.GetDataDescriptors().Where(x => x.Name.Contains("Spatial co-evolution")).Single();
            var regProblem = new RegressionProblem();

            regProblem.Load(provider.LoadData(instance));

            #region Algorithm Configuration
            gpr.Name        = "Gaussian Process Regression";
            gpr.Description = "A Gaussian process regression algorithm which solves the spatial co-evolution benchmark problem";
            gpr.Problem     = regProblem;

            gpr.CovarianceFunction     = new CovarianceSquaredExponentialIso();
            gpr.MeanFunction           = new MeanConst();
            gpr.MinimizationIterations = 20;
            gpr.Seed            = 0;
            gpr.SetSeedRandomly = true;
            #endregion

            gpr.Engine = new ParallelEngine.ParallelEngine();
            return(gpr);
        }
        public void GradientBoostingTestTowerSquaredError()
        {
            var gbt        = new GradientBoostedTreesAlgorithm();
            var provider   = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
            var instance   = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
            var regProblem = new RegressionProblem();

            regProblem.Load(provider.LoadData(instance));

            #region Algorithm Configuration
            gbt.Problem         = regProblem;
            gbt.Seed            = 0;
            gbt.SetSeedRandomly = false;
            gbt.Iterations      = 5000;
            gbt.MaxSize         = 20;
            gbt.CreateSolution  = false;
            #endregion

            gbt.Start();

            Console.WriteLine(gbt.ExecutionTime);
            Assert.AreEqual(267.68704241153921, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6);
            Assert.AreEqual(393.84704062205469, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6);
        }
        public void TestDecisionTreePartialDependence()
        {
            var provider   = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
            var instance   = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
            var regProblem = new RegressionProblem();

            regProblem.Load(provider.LoadData(instance));
            var problemData = regProblem.ProblemData;
            var state       = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02);

            for (int i = 0; i < 1000; i++)
            {
                GradientBoostedTreesAlgorithmStatic.MakeStep(state);
            }


            var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First();

            Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value);
            var model = ((IGradientBoostedTreesModel)state.GetModel());
            var treeM = model.Models.Skip(1).First();

            Console.WriteLine(treeM.ToString());
            Console.WriteLine();

            var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray();
            var ds = new ModifiableDataset(new string[] { mostImportantVar.Key },
                                           new IList[] { mostImportantVarValues.ToList <double>() });

            var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray();

            for (int i = 0; i < mostImportantVarValues.Length; i += 10)
            {
                Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]);
            }
        }
    public GradientBoostingRegressionAlgorithm() {
      Problem = new RegressionProblem(); // default problem
      var osgp = CreateOSGP();
      var regressionAlgs = new ItemSet<IAlgorithm>(new IAlgorithm[] {
        new RandomForestRegression(),
        osgp,
      });
      foreach (var alg in regressionAlgs) alg.Prepare();


      Parameters.Add(new FixedValueParameter<IntValue>(IterationsParameterName,
        "Number of iterations", new IntValue(100)));
      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName,
        "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName,
        "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(NuParameterName,
        "The learning rate nu when updating predictions in GBM (0 < nu <= 1)", new DoubleValue(0.5)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(RParameterName,
        "The fraction of rows that are sampled randomly for the base learner in each iteration (0 < r <= 1)",
        new DoubleValue(1)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName,
        "The fraction of variables that are sampled randomly for the base learner in each iteration (0 < m <= 1)",
        new DoubleValue(0.5)));
      Parameters.Add(new ConstrainedValueParameter<IAlgorithm>(RegressionAlgorithmParameterName,
        "The regression algorithm to use as a base learner", regressionAlgs, osgp));
      Parameters.Add(new FixedValueParameter<StringValue>(RegressionAlgorithmSolutionResultParameterName,
        "The name of the solution produced by the regression algorithm", new StringValue("Solution")));
      Parameters[RegressionAlgorithmSolutionResultParameterName].Hidden = true;
      Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName,
        "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true)));
      Parameters[CreateSolutionParameterName].Hidden = true;
      Parameters.Add(new FixedValueParameter<BoolValue>(StoreRunsParameterName,
        "Flag that indicates if the results of the individual runs should be stored for detailed analysis", new BoolValue(false)));
      Parameters[StoreRunsParameterName].Hidden = true;
    }
    public void GradientBoostingTestTowerRelativeError() {
      var gbt = new GradientBoostedTreesAlgorithm();
      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
      var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));

      #region Algorithm Configuration
      gbt.Problem = regProblem;
      gbt.Seed = 0;
      gbt.SetSeedRandomly = false;
      gbt.Iterations = 3000;
      gbt.MaxSize = 20;
      gbt.Nu = 0.005;
      gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Relative"));
      gbt.CreateSolution = false;
      #endregion

      RunAlgorithm(gbt);

      Console.WriteLine(gbt.ExecutionTime);
      Assert.AreEqual(0.061954221604374943, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6);
      Assert.AreEqual(0.06316303473499961, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6);
    }
    public void GradientBoostingTestTowerSquaredError() {
      var gbt = new GradientBoostedTreesAlgorithm();
      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
      var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));

      #region Algorithm Configuration
      gbt.Problem = regProblem;
      gbt.Seed = 0;
      gbt.SetSeedRandomly = false;
      gbt.Iterations = 5000;
      gbt.MaxSize = 20;
      gbt.CreateSolution = false;
      #endregion

      RunAlgorithm(gbt);

      Console.WriteLine(gbt.ExecutionTime);
      Assert.AreEqual(267.68704241153921, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6);
      Assert.AreEqual(393.84704062205469, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6);
    }
    public void TestDecisionTreePersistence() {
      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
      var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));
      var problemData = regProblem.ProblemData;
      var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 100, r: 0.5, m: 1, nu: 1);
      GradientBoostedTreesAlgorithmStatic.MakeStep(state);

      var model = ((IGradientBoostedTreesModel)state.GetModel());
      var treeM = model.Models.Skip(1).First();
      var origStr = treeM.ToString();
      using (var memStream = new MemoryStream()) {
        Persistence.Default.Xml.XmlGenerator.Serialize(treeM, memStream);
        var buf = memStream.GetBuffer();
        using (var restoreStream = new MemoryStream(buf)) {
          var restoredTree = Persistence.Default.Xml.XmlParser.Deserialize(restoreStream);
          var restoredStr = restoredTree.ToString();
          Assert.AreEqual(origStr, restoredStr);
        }
      }
    }
    public void TestDecisionTreePartialDependence() {
      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider();
      var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower"));
      var regProblem = new RegressionProblem();
      regProblem.Load(provider.LoadData(instance));
      var problemData = regProblem.ProblemData;
      var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02);
      for (int i = 0; i < 1000; i++)
        GradientBoostedTreesAlgorithmStatic.MakeStep(state);


      var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First();
      Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value);
      var model = ((IGradientBoostedTreesModel)state.GetModel());
      var treeM = model.Models.Skip(1).First();
      Console.WriteLine(treeM.ToString());
      Console.WriteLine();

      var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray();
      var ds = new ModifiableDataset(new string[] { mostImportantVar.Key },
        new IList[] { mostImportantVarValues.ToList<double>() });

      var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray();

      for (int i = 0; i < mostImportantVarValues.Length; i += 10) {
        Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]);
      }
    }
    public GradientBoostedTreesAlgorithm() {
      Problem = new RegressionProblem(); // default problem

      Parameters.Add(new FixedValueParameter<IntValue>(IterationsParameterName, "Number of iterations (set as high as possible, adjust in combination with nu, when increasing iterations also decrease nu)", new IntValue(1000)));
      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
      Parameters.Add(new FixedValueParameter<IntValue>(MaxSizeParameterName, "Maximal size of the tree learned in each step (prefer smaller sizes if possible)", new IntValue(10)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(RParameterName, "Ratio of training rows selected randomly in each step (0 < R <= 1)", new DoubleValue(0.5)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "Ratio of variables selected randomly in each step (0 < M <= 1)", new DoubleValue(0.5)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(NuParameterName, "Learning rate nu (step size for the gradient update, should be small 0 < nu < 0.1)", new DoubleValue(0.002)));
      Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "", new IntValue(100)));
      Parameters[UpdateIntervalParameterName].Hidden = true;
      Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true)));
      Parameters[CreateSolutionParameterName].Hidden = true;

      var lossFunctions = ApplicationManager.Manager.GetInstances<ILossFunction>();
      Parameters.Add(new ConstrainedValueParameter<ILossFunction>(LossFunctionParameterName, "The loss function", new ItemSet<ILossFunction>(lossFunctions)));
      LossFunctionParameter.Value = LossFunctionParameter.ValidValues.First(f => f.ToString().Contains("Squared")); // squared error loss is the default
    }
 public NearestNeighbourRegression()
     : base()
 {
     Parameters.Add(new FixedValueParameter <IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
     Problem = new RegressionProblem();
 }
 public LinearRegression()
     : base()
 {
     Problem = new RegressionProblem();
 }