private GaussianProcessRegression CreateGaussianProcessRegressionSample() { var gpr = new GaussianProcessRegression(); var provider = new VariousInstanceProvider(); var instance = provider.GetDataDescriptors().Where(x => x.Name.Contains("Spatial co-evolution")).Single(); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gpr.Name = "Gaussian Process Regression"; gpr.Description = "A Gaussian process regression algorithm which solves the spatial co-evolution benchmark problem"; gpr.Problem = regProblem; gpr.CovarianceFunction = new CovarianceSquaredExponentialIso(); gpr.MeanFunction = new MeanConst(); gpr.MinimizationIterations = 20; gpr.Seed = 0; gpr.SetSeedRandomly = true; #endregion gpr.Engine = new ParallelEngine.ParallelEngine(); return gpr; }
private GaussianProcessRegression CreateGaussianProcessRegressionSample() { var gpr = new GaussianProcessRegression(); var provider = new VariousInstanceProvider(); var instance = provider.GetDataDescriptors().Where(x => x.Name.Contains("Spatial co-evolution")).Single(); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gpr.Name = "Gaussian Process Regression"; gpr.Description = "A Gaussian process regression algorithm which solves the spatial co-evolution benchmark problem"; gpr.Problem = regProblem; gpr.CovarianceFunction = new CovarianceSquaredExponentialIso(); gpr.MeanFunction = new MeanConst(); gpr.MinimizationIterations = 20; gpr.Seed = 0; gpr.SetSeedRandomly = true; #endregion gpr.Engine = new ParallelEngine.ParallelEngine(); return(gpr); }
public void GradientBoostingTestTowerSquaredError() { var gbt = new GradientBoostedTreesAlgorithm(); var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gbt.Problem = regProblem; gbt.Seed = 0; gbt.SetSeedRandomly = false; gbt.Iterations = 5000; gbt.MaxSize = 20; gbt.CreateSolution = false; #endregion gbt.Start(); Console.WriteLine(gbt.ExecutionTime); Assert.AreEqual(267.68704241153921, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6); Assert.AreEqual(393.84704062205469, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6); }
public void TestDecisionTreePartialDependence() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); var problemData = regProblem.ProblemData; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02); for (int i = 0; i < 1000; i++) { GradientBoostedTreesAlgorithmStatic.MakeStep(state); } var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First(); Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value); var model = ((IGradientBoostedTreesModel)state.GetModel()); var treeM = model.Models.Skip(1).First(); Console.WriteLine(treeM.ToString()); Console.WriteLine(); var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray(); var ds = new ModifiableDataset(new string[] { mostImportantVar.Key }, new IList[] { mostImportantVarValues.ToList <double>() }); var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray(); for (int i = 0; i < mostImportantVarValues.Length; i += 10) { Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]); } }
public GradientBoostingRegressionAlgorithm() { Problem = new RegressionProblem(); // default problem var osgp = CreateOSGP(); var regressionAlgs = new ItemSet<IAlgorithm>(new IAlgorithm[] { new RandomForestRegression(), osgp, }); foreach (var alg in regressionAlgs) alg.Prepare(); Parameters.Add(new FixedValueParameter<IntValue>(IterationsParameterName, "Number of iterations", new IntValue(100))); Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); Parameters.Add(new FixedValueParameter<DoubleValue>(NuParameterName, "The learning rate nu when updating predictions in GBM (0 < nu <= 1)", new DoubleValue(0.5))); Parameters.Add(new FixedValueParameter<DoubleValue>(RParameterName, "The fraction of rows that are sampled randomly for the base learner in each iteration (0 < r <= 1)", new DoubleValue(1))); Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "The fraction of variables that are sampled randomly for the base learner in each iteration (0 < m <= 1)", new DoubleValue(0.5))); Parameters.Add(new ConstrainedValueParameter<IAlgorithm>(RegressionAlgorithmParameterName, "The regression algorithm to use as a base learner", regressionAlgs, osgp)); Parameters.Add(new FixedValueParameter<StringValue>(RegressionAlgorithmSolutionResultParameterName, "The name of the solution produced by the regression algorithm", new StringValue("Solution"))); Parameters[RegressionAlgorithmSolutionResultParameterName].Hidden = true; Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true))); Parameters[CreateSolutionParameterName].Hidden = true; Parameters.Add(new FixedValueParameter<BoolValue>(StoreRunsParameterName, "Flag that indicates if the results of the individual runs should be stored for detailed analysis", new BoolValue(false))); Parameters[StoreRunsParameterName].Hidden = true; }
public void GradientBoostingTestTowerRelativeError() { var gbt = new GradientBoostedTreesAlgorithm(); var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gbt.Problem = regProblem; gbt.Seed = 0; gbt.SetSeedRandomly = false; gbt.Iterations = 3000; gbt.MaxSize = 20; gbt.Nu = 0.005; gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Relative")); gbt.CreateSolution = false; #endregion RunAlgorithm(gbt); Console.WriteLine(gbt.ExecutionTime); Assert.AreEqual(0.061954221604374943, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6); Assert.AreEqual(0.06316303473499961, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6); }
public void GradientBoostingTestTowerSquaredError() { var gbt = new GradientBoostedTreesAlgorithm(); var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gbt.Problem = regProblem; gbt.Seed = 0; gbt.SetSeedRandomly = false; gbt.Iterations = 5000; gbt.MaxSize = 20; gbt.CreateSolution = false; #endregion RunAlgorithm(gbt); Console.WriteLine(gbt.ExecutionTime); Assert.AreEqual(267.68704241153921, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6); Assert.AreEqual(393.84704062205469, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6); }
public void TestDecisionTreePersistence() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); var problemData = regProblem.ProblemData; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 100, r: 0.5, m: 1, nu: 1); GradientBoostedTreesAlgorithmStatic.MakeStep(state); var model = ((IGradientBoostedTreesModel)state.GetModel()); var treeM = model.Models.Skip(1).First(); var origStr = treeM.ToString(); using (var memStream = new MemoryStream()) { Persistence.Default.Xml.XmlGenerator.Serialize(treeM, memStream); var buf = memStream.GetBuffer(); using (var restoreStream = new MemoryStream(buf)) { var restoredTree = Persistence.Default.Xml.XmlParser.Deserialize(restoreStream); var restoredStr = restoredTree.ToString(); Assert.AreEqual(origStr, restoredStr); } } }
public void TestDecisionTreePartialDependence() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); var problemData = regProblem.ProblemData; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02); for (int i = 0; i < 1000; i++) GradientBoostedTreesAlgorithmStatic.MakeStep(state); var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First(); Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value); var model = ((IGradientBoostedTreesModel)state.GetModel()); var treeM = model.Models.Skip(1).First(); Console.WriteLine(treeM.ToString()); Console.WriteLine(); var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray(); var ds = new ModifiableDataset(new string[] { mostImportantVar.Key }, new IList[] { mostImportantVarValues.ToList<double>() }); var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray(); for (int i = 0; i < mostImportantVarValues.Length; i += 10) { Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]); } }
public GradientBoostedTreesAlgorithm() { Problem = new RegressionProblem(); // default problem Parameters.Add(new FixedValueParameter<IntValue>(IterationsParameterName, "Number of iterations (set as high as possible, adjust in combination with nu, when increasing iterations also decrease nu)", new IntValue(1000))); Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); Parameters.Add(new FixedValueParameter<IntValue>(MaxSizeParameterName, "Maximal size of the tree learned in each step (prefer smaller sizes if possible)", new IntValue(10))); Parameters.Add(new FixedValueParameter<DoubleValue>(RParameterName, "Ratio of training rows selected randomly in each step (0 < R <= 1)", new DoubleValue(0.5))); Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "Ratio of variables selected randomly in each step (0 < M <= 1)", new DoubleValue(0.5))); Parameters.Add(new FixedValueParameter<DoubleValue>(NuParameterName, "Learning rate nu (step size for the gradient update, should be small 0 < nu < 0.1)", new DoubleValue(0.002))); Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "", new IntValue(100))); Parameters[UpdateIntervalParameterName].Hidden = true; Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true))); Parameters[CreateSolutionParameterName].Hidden = true; var lossFunctions = ApplicationManager.Manager.GetInstances<ILossFunction>(); Parameters.Add(new ConstrainedValueParameter<ILossFunction>(LossFunctionParameterName, "The loss function", new ItemSet<ILossFunction>(lossFunctions))); LossFunctionParameter.Value = LossFunctionParameter.ValidValues.First(f => f.ToString().Contains("Squared")); // squared error loss is the default }
public NearestNeighbourRegression() : base() { Parameters.Add(new FixedValueParameter <IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); Problem = new RegressionProblem(); }
public LinearRegression() : base() { Problem = new RegressionProblem(); }