protected SymbolicDataAnalysisProblem(T problemData, U evaluator, V solutionCreator) : base(evaluator, solutionCreator) { Parameters.Add(new ValueParameter <T>(ProblemDataParameterName, ProblemDataParameterDescription, problemData)); Parameters.Add(new ValueParameter <ISymbolicDataAnalysisGrammar>(SymbolicExpressionTreeGrammarParameterName, SymbolicExpressionTreeGrammarParameterDescription)); Parameters.Add(new ValueParameter <ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, SymoblicExpressionTreeInterpreterParameterDescription)); Parameters.Add(new FixedValueParameter <IntValue>(MaximumSymbolicExpressionTreeDepthParameterName, MaximumSymbolicExpressionTreeDepthParameterDescription)); Parameters.Add(new FixedValueParameter <IntValue>(MaximumSymbolicExpressionTreeLengthParameterName, MaximumSymbolicExpressionTreeLengthParameterDescription)); Parameters.Add(new FixedValueParameter <IntValue>(MaximumFunctionDefinitionsParameterName, MaximumFunctionDefinitionsParameterDescription)); Parameters.Add(new FixedValueParameter <IntValue>(MaximumFunctionArgumentsParameterName, MaximumFunctionArgumentsParameterDescription)); Parameters.Add(new FixedValueParameter <IntRange>(FitnessCalculationPartitionParameterName, FitnessCalculationPartitionParameterDescription)); Parameters.Add(new FixedValueParameter <IntRange>(ValidationPartitionParameterName, ValidationPartitionParameterDescription)); Parameters.Add(new FixedValueParameter <PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, RelativeNumberOfEvaluatedSamplesParameterDescription, new PercentValue(1))); Parameters.Add(new FixedValueParameter <BoolValue>(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false))); SymbolicExpressionTreeInterpreterParameter.Hidden = true; MaximumFunctionArgumentsParameter.Hidden = true; MaximumFunctionDefinitionsParameter.Hidden = true; ApplyLinearScalingParameter.Hidden = true; SymbolicExpressionTreeGrammar = new TypeCoherentExpressionGrammar(); SymbolicExpressionTreeInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); FitnessCalculationPartition.Start = ProblemData.TrainingPartition.Start; FitnessCalculationPartition.End = ProblemData.TrainingPartition.End; InitializeOperators(); UpdateGrammar(); RegisterEventHandlers(); }
public static ISymbolicExpressionGrammar CreateArithmeticAndAdfGrammar() { var g = new TypeCoherentExpressionGrammar(); g.ConfigureAsDefaultRegressionGrammar(); g.Symbols.OfType<Variable>().First().Enabled = false; g.MaximumFunctionArguments = 3; g.MinimumFunctionArguments = 0; g.MaximumFunctionDefinitions = 3; g.MinimumFunctionDefinitions = 0; return g; }
public void MaxCommonSubtreeSimilarityCalculatorTestPerformance() { var grammar = new TypeCoherentExpressionGrammar(); grammar.ConfigureAsDefaultRegressionGrammar(); var twister = new MersenneTwister(31415); var ds = Util.CreateRandomDataset(twister, Rows, Columns); var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 1, 100, 0, 0); double s = 0; var sw = new Stopwatch(); sw.Start(); for (int i = 0; i < trees.Length - 1; ++i) { for (int j = i + 1; j < trees.Length; ++j) { s += SymbolicExpressionTreeMaxCommonSubtreeSimilarityCalculator.MaxCommonSubtreeSimilarity(trees[i], trees[j], comparer); } } sw.Stop(); Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / 1000.0 + ", Avg. similarity: " + s / (N * (N - 1) / 2)); Console.WriteLine(N * (N + 1) / (2 * sw.ElapsedMilliseconds / 1000.0) + " similarity calculations per second."); }
private GeneticAlgorithm CreateGpSymbolicClassificationSample() { GeneticAlgorithm ga = new GeneticAlgorithm(); #region Problem Configuration SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem(); symbClassProblem.Name = "Mammography Classification Problem"; symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)"; UCIInstanceProvider provider = new UCIInstanceProvider(); var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single(); var mammoData = (ClassificationProblemData)provider.LoadData(instance); mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues .First(v => v.Value == "Severity"); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Age"), true); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Shape"), true); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Margin"), true); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Density"), true); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Severity"), false); mammoData.TrainingPartition.Start = 0; mammoData.TrainingPartition.End = 800; mammoData.TestPartition.Start = 800; mammoData.TestPartition.End = 961; mammoData.Name = "Data imported from mammographic_masses.csv"; mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values."; symbClassProblem.ProblemData = mammoData; // configure grammar var grammar = new TypeCoherentExpressionGrammar(); grammar.ConfigureAsDefaultClassificationGrammar(); grammar.Symbols.OfType<VariableCondition>().Single().Enabled = false; var varSymbol = grammar.Symbols.OfType<Variable>().Where(x => !(x is LaggedVariable)).Single(); varSymbol.WeightMu = 1.0; varSymbol.WeightSigma = 1.0; varSymbol.WeightManipulatorMu = 0.0; varSymbol.WeightManipulatorSigma = 0.05; varSymbol.MultiplicativeWeightManipulatorSigma = 0.03; var constSymbol = grammar.Symbols.OfType<Constant>().Single(); constSymbol.MaxValue = 20; constSymbol.MinValue = -20; constSymbol.ManipulatorMu = 0.0; constSymbol.ManipulatorSigma = 1; constSymbol.MultiplicativeManipulatorSigma = 0.03; symbClassProblem.SymbolicExpressionTreeGrammar = grammar; // configure remaining problem parameters symbClassProblem.BestKnownQuality.Value = 0.0; symbClassProblem.FitnessCalculationPartition.Start = 0; symbClassProblem.FitnessCalculationPartition.End = 400; symbClassProblem.ValidationPartition.Start = 400; symbClassProblem.ValidationPartition.End = 800; symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1; symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100; symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10; symbClassProblem.MaximumFunctionDefinitions.Value = 0; symbClassProblem.MaximumFunctionArguments.Value = 0; symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator(); #endregion #region Algorithm Configuration ga.Problem = symbClassProblem; ga.Name = "Genetic Programming - Symbolic Classification"; ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)"; SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>( ga, 1000, 1, 100, 0.15, 5 ); var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator; mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1; mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0; ga.Analyzer.Operators.SetItemCheckedState( ga.Analyzer.Operators .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>() .Single(), false); ga.Analyzer.Operators.SetItemCheckedState( ga.Analyzer.Operators .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>() .First(), false); #endregion return ga; }
protected TypeCoherentExpressionGrammar(TypeCoherentExpressionGrammar original, Cloner cloner) : base(original, cloner) { }
private GeneticAlgorithm CreateGpSymbolicRegressionSample() { GeneticAlgorithm ga = new GeneticAlgorithm(); #region Problem Configuration SymbolicRegressionSingleObjectiveProblem symbRegProblem = new SymbolicRegressionSingleObjectiveProblem(); symbRegProblem.Name = "Tower Symbolic Regression Problem"; symbRegProblem.Description = "Tower Dataset (downloaded from: http://www.symbolicregression.com/?q=towerProblem)"; RegressionRealWorldInstanceProvider provider = new RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Tower")).Single(); var towerProblemData = (RegressionProblemData)provider.LoadData(instance); towerProblemData.TargetVariableParameter.Value = towerProblemData.TargetVariableParameter.ValidValues .First(v => v.Value == "towerResponse"); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x1"), true); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x7"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x11"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x16"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x21"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "x25"), false); towerProblemData.InputVariables.SetItemCheckedState( towerProblemData.InputVariables.Single(x => x.Value == "towerResponse"), false); towerProblemData.TrainingPartition.Start = 0; towerProblemData.TrainingPartition.End = 3136; towerProblemData.TestPartition.Start = 3136; towerProblemData.TestPartition.End = 4999; towerProblemData.Name = "Data imported from towerData.txt"; towerProblemData.Description = "Chemical concentration at top of distillation tower, dataset downloaded from: http://vanillamodeling.com/realproblems.html, best R² achieved with nu-SVR = 0.97"; symbRegProblem.ProblemData = towerProblemData; // configure grammar var grammar = new TypeCoherentExpressionGrammar(); grammar.ConfigureAsDefaultRegressionGrammar(); grammar.Symbols.OfType<VariableCondition>().Single().InitialFrequency = 0.0; var varSymbol = grammar.Symbols.OfType<Variable>().Where(x => !(x is LaggedVariable)).Single(); varSymbol.WeightMu = 1.0; varSymbol.WeightSigma = 1.0; varSymbol.WeightManipulatorMu = 0.0; varSymbol.WeightManipulatorSigma = 0.05; varSymbol.MultiplicativeWeightManipulatorSigma = 0.03; var constSymbol = grammar.Symbols.OfType<Constant>().Single(); constSymbol.MaxValue = 20; constSymbol.MinValue = -20; constSymbol.ManipulatorMu = 0.0; constSymbol.ManipulatorSigma = 1; constSymbol.MultiplicativeManipulatorSigma = 0.03; symbRegProblem.SymbolicExpressionTreeGrammar = grammar; // configure remaining problem parameters symbRegProblem.BestKnownQuality.Value = 0.97; symbRegProblem.FitnessCalculationPartition.Start = 0; symbRegProblem.FitnessCalculationPartition.End = 2300; symbRegProblem.ValidationPartition.Start = 2300; symbRegProblem.ValidationPartition.End = 3136; symbRegProblem.RelativeNumberOfEvaluatedSamples.Value = 1; symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 150; symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 12; symbRegProblem.MaximumFunctionDefinitions.Value = 0; symbRegProblem.MaximumFunctionArguments.Value = 0; symbRegProblem.EvaluatorParameter.Value = new SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator(); #endregion #region Algorithm Configuration ga.Problem = symbRegProblem; ga.Name = "Genetic Programming - Symbolic Regression"; ga.Description = "A standard genetic programming algorithm to solve a symbolic regression problem (tower dataset)"; SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>( ga, 1000, 1, 50, 0.15, 5); var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator; mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1; mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0; ga.Analyzer.Operators.SetItemCheckedState( ga.Analyzer.Operators .OfType<SymbolicRegressionSingleObjectiveOverfittingAnalyzer>() .Single(), false); ga.Analyzer.Operators.SetItemCheckedState( ga.Analyzer.Operators .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>() .First(), false); #endregion return ga; }