public static ISymbolicExpressionTree Prune(ISymbolicExpressionTree tree, ISymbolicClassificationModelCreator modelCreator, SymbolicClassificationSolutionImpactValuesCalculator impactValuesCalculator, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IClassificationProblemData problemData, DoubleLimit estimationLimits, IEnumerable <int> rows, double nodeImpactThreshold = 0.0, bool pruneOnlyZeroImpactNodes = false) { var clonedTree = (ISymbolicExpressionTree)tree.Clone(); var model = modelCreator.CreateSymbolicClassificationModel(clonedTree, interpreter, estimationLimits.Lower, estimationLimits.Upper); var nodes = clonedTree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToList(); double qualityForImpactsCalculation = double.NaN; for (int i = 0; i < nodes.Count; ++i) { var node = nodes[i]; if (node is ConstantTreeNode) { continue; } double impactValue, replacementValue, newQualityForImpactsCalculation; impactValuesCalculator.CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation); if (pruneOnlyZeroImpactNodes && !impactValue.IsAlmost(0.0)) { continue; } if (!pruneOnlyZeroImpactNodes && impactValue > nodeImpactThreshold) { continue; } var constantNode = (ConstantTreeNode)node.Grammar.GetSymbol("Constant").CreateTreeNode(); constantNode.Value = replacementValue; ReplaceWithConstant(node, constantNode); i += node.GetLength() - 1; // skip subtrees under the node that was folded qualityForImpactsCalculation = newQualityForImpactsCalculation; } return(model.SymbolicExpressionTree); }
public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, ISymbolicClassificationModelCreator modelCreator, double normalizedMeanSquaredErrorWeightingFactor, double falseNegativeRateWeightingFactor, double falsePositiveRateWeightingFactor) { var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows); var targetClassValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit).ToArray(); OnlineCalculatorError errorState; double nmse; //calculate performance measures string positiveClassName = problemData.PositiveClass; double[] classValues, thresholds; IEnumerable <double> estimatedClassValues = null; ISymbolicDiscriminantFunctionClassificationModel m; var model = modelCreator.CreateSymbolicClassificationModel(problemData.TargetVariable, tree, interpreter, lowerEstimationLimit, upperEstimationLimit); if ((m = model as ISymbolicDiscriminantFunctionClassificationModel) != null) { m.ThresholdCalculator.Calculate(problemData, boundedEstimatedValues, targetClassValues, out classValues, out thresholds); m.SetThresholdsAndClassValues(thresholds, classValues); estimatedClassValues = m.GetEstimatedClassValues(boundedEstimatedValues); } else { model.RecalculateModelParameters(problemData, rows); estimatedClassValues = model.GetEstimatedClassValues(problemData.Dataset, rows); } var performanceCalculator = new ClassificationPerformanceMeasuresCalculator(positiveClassName, problemData.GetClassValue(positiveClassName)); performanceCalculator.Calculate(targetClassValues, estimatedClassValues); if (performanceCalculator.ErrorState != OnlineCalculatorError.None) { return(Double.NaN); } double falseNegativeRate = 1 - performanceCalculator.TruePositiveRate; double falsePositiveRate = performanceCalculator.FalsePositiveRate; if (applyLinearScaling) { throw new NotSupportedException("The Weighted Performance Measures Evaluator does not suppport linear scaling!"); } nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetClassValues, boundedEstimatedValues, out errorState); if (errorState != OnlineCalculatorError.None) { return(Double.NaN); } return(normalizedMeanSquaredErrorWeightingFactor * nmse + falseNegativeRateWeightingFactor * falseNegativeRate + falsePositiveRateWeightingFactor * falsePositiveRate); }
public static ISymbolicExpressionTree Prune(ISymbolicExpressionTree tree, ISymbolicClassificationModelCreator modelCreator, SymbolicClassificationSolutionImpactValuesCalculator impactValuesCalculator, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IClassificationProblemData problemData, DoubleLimit estimationLimits, IEnumerable<int> rows, double nodeImpactThreshold = 0.0, bool pruneOnlyZeroImpactNodes = false) { var clonedTree = (ISymbolicExpressionTree)tree.Clone(); var model = modelCreator.CreateSymbolicClassificationModel(problemData.TargetVariable, clonedTree, interpreter, estimationLimits.Lower, estimationLimits.Upper); var nodes = clonedTree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToList(); double qualityForImpactsCalculation = double.NaN; for (int i = 0; i < nodes.Count; ++i) { var node = nodes[i]; if (node is ConstantTreeNode) continue; double impactValue, replacementValue, newQualityForImpactsCalculation; impactValuesCalculator.CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation); if (pruneOnlyZeroImpactNodes && !impactValue.IsAlmost(0.0)) continue; if (!pruneOnlyZeroImpactNodes && impactValue > nodeImpactThreshold) continue; var constantNode = (ConstantTreeNode)node.Grammar.GetSymbol("Constant").CreateTreeNode(); constantNode.Value = replacementValue; ReplaceWithConstant(node, constantNode); i += node.GetLength() - 1; // skip subtrees under the node that was folded qualityForImpactsCalculation = newQualityForImpactsCalculation; } return model.SymbolicExpressionTree; }