public void GradientBoostingTestTowerRelativeError() { var gbt = new GradientBoostedTreesAlgorithm(); var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gbt.Problem = regProblem; gbt.Seed = 0; gbt.SetSeedRandomly = false; gbt.Iterations = 3000; gbt.MaxSize = 20; gbt.Nu = 0.005; gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Relative")); gbt.CreateSolution = false; #endregion gbt.Start(); Console.WriteLine(gbt.ExecutionTime); Assert.AreEqual(0.061954221604374943, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6); Assert.AreEqual(0.06316303473499961, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6); }
public void TestDecisionTreePersistence() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); var problemData = regProblem.ProblemData; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 100, r: 0.5, m: 1, nu: 1); GradientBoostedTreesAlgorithmStatic.MakeStep(state); var model = ((IGradientBoostedTreesModel)state.GetModel()); var treeM = model.Models.Skip(1).First(); var origStr = treeM.ToString(); using (var memStream = new MemoryStream()) { Persistence.Default.Xml.XmlGenerator.Serialize(treeM, memStream); var buf = memStream.GetBuffer(); using (var restoreStream = new MemoryStream(buf)) { var restoredTree = Persistence.Default.Xml.XmlParser.Deserialize(restoreStream); var restoredStr = restoredTree.ToString(); Assert.AreEqual(origStr, restoredStr); } } }
public void GradientBoostingTestTowerAbsoluteError() { var gbt = new GradientBoostedTreesAlgorithm(); var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gbt.Problem = regProblem; gbt.Seed = 0; gbt.SetSeedRandomly = false; gbt.Iterations = 1000; gbt.MaxSize = 20; gbt.Nu = 0.02; gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Absolute")); gbt.ModelCreation = GradientBoostedTrees.ModelCreation.QualityOnly; #endregion gbt.Start(); Console.WriteLine(gbt.ExecutionTime); Assert.AreEqual(10.551385044666661, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6); Assert.AreEqual(12.918001745581172, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6); }
public void GradientBoostingTestTowerSquaredError() { var gbt = new GradientBoostedTreesAlgorithm(); var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gbt.Problem = regProblem; gbt.Seed = 0; gbt.SetSeedRandomly = false; gbt.Iterations = 5000; gbt.MaxSize = 20; gbt.CreateSolution = false; #endregion gbt.Start(); Console.WriteLine(gbt.ExecutionTime); Assert.AreEqual(267.68704241153921, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6); Assert.AreEqual(393.84704062205469, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6); }
public void TestDecisionTreePartialDependence() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); var problemData = regProblem.ProblemData; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02); for (int i = 0; i < 1000; i++) { GradientBoostedTreesAlgorithmStatic.MakeStep(state); } var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First(); Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value); var model = ((IGradientBoostedTreesModel)state.GetModel()); var treeM = model.Models.Skip(1).First(); Console.WriteLine(treeM.ToString()); Console.WriteLine(); var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray(); var ds = new ModifiableDataset(new string[] { mostImportantVar.Key }, new IList[] { mostImportantVarValues.ToList <double>() }); var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray(); for (int i = 0; i < mostImportantVarValues.Length; i += 10) { Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]); } }
public void GradientBoostingTestTowerRelativeError() { var gbt = new GradientBoostedTreesAlgorithm(); var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gbt.Problem = regProblem; gbt.Seed = 0; gbt.SetSeedRandomly = false; gbt.Iterations = 3000; gbt.MaxSize = 20; gbt.Nu = 0.005; gbt.LossFunctionParameter.Value = gbt.LossFunctionParameter.ValidValues.First(l => l.ToString().Contains("Relative")); gbt.CreateSolution = false; #endregion RunAlgorithm(gbt); Console.WriteLine(gbt.ExecutionTime); Assert.AreEqual(0.061954221604374943, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6); Assert.AreEqual(0.06316303473499961, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6); }
public void GradientBoostingTestTowerSquaredError() { var gbt = new GradientBoostedTreesAlgorithm(); var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); #region Algorithm Configuration gbt.Problem = regProblem; gbt.Seed = 0; gbt.SetSeedRandomly = false; gbt.Iterations = 5000; gbt.MaxSize = 20; gbt.CreateSolution = false; #endregion RunAlgorithm(gbt); Console.WriteLine(gbt.ExecutionTime); Assert.AreEqual(267.68704241153921, ((DoubleValue)gbt.Results["Loss (train)"].Value).Value, 1E-6); Assert.AreEqual(393.84704062205469, ((DoubleValue)gbt.Results["Loss (test)"].Value).Value, 1E-6); }
public void TestDecisionTreePartialDependence() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); var problemData = regProblem.ProblemData; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02); for (int i = 0; i < 1000; i++) GradientBoostedTreesAlgorithmStatic.MakeStep(state); var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First(); Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value); var model = ((IGradientBoostedTreesModel)state.GetModel()); var treeM = model.Models.Skip(1).First(); Console.WriteLine(treeM.ToString()); Console.WriteLine(); var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray(); var ds = new ModifiableDataset(new string[] { mostImportantVar.Key }, new IList[] { mostImportantVarValues.ToList<double>() }); var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray(); for (int i = 0; i < mostImportantVarValues.Length; i += 10) { Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]); } }