public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, ISymbolicClassificationModelCreator modelCreator, double normalizedMeanSquaredErrorWeightingFactor, double falseNegativeRateWeightingFactor, double falsePositiveRateWeightingFactor) { var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows); var targetClassValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit).ToArray(); OnlineCalculatorError errorState; double nmse; //calculate performance measures string positiveClassName = problemData.PositiveClass; double[] classValues, thresholds; IEnumerable <double> estimatedClassValues = null; ISymbolicDiscriminantFunctionClassificationModel m; var model = modelCreator.CreateSymbolicClassificationModel(problemData.TargetVariable, tree, interpreter, lowerEstimationLimit, upperEstimationLimit); if ((m = model as ISymbolicDiscriminantFunctionClassificationModel) != null) { m.ThresholdCalculator.Calculate(problemData, boundedEstimatedValues, targetClassValues, out classValues, out thresholds); m.SetThresholdsAndClassValues(thresholds, classValues); estimatedClassValues = m.GetEstimatedClassValues(boundedEstimatedValues); } else { model.RecalculateModelParameters(problemData, rows); estimatedClassValues = model.GetEstimatedClassValues(problemData.Dataset, rows); } var performanceCalculator = new ClassificationPerformanceMeasuresCalculator(positiveClassName, problemData.GetClassValue(positiveClassName)); performanceCalculator.Calculate(targetClassValues, estimatedClassValues); if (performanceCalculator.ErrorState != OnlineCalculatorError.None) { return(Double.NaN); } double falseNegativeRate = 1 - performanceCalculator.TruePositiveRate; double falsePositiveRate = performanceCalculator.FalsePositiveRate; if (applyLinearScaling) { throw new NotSupportedException("The Weighted Performance Measures Evaluator does not suppport linear scaling!"); } nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetClassValues, boundedEstimatedValues, out errorState); if (errorState != OnlineCalculatorError.None) { return(Double.NaN); } return(normalizedMeanSquaredErrorWeightingFactor * nmse + falseNegativeRateWeightingFactor * falseNegativeRate + falsePositiveRateWeightingFactor * falsePositiveRate); }
/// <summary> /// Elastic net with squared-error-loss for dense predictor matrix, runs the full path of all lambdas /// </summary> /// <param name="problemData">Predictor target matrix x and target vector y</param> /// <param name="penalty">Penalty for balance between ridge (0.0) and lasso (1.0) regression</param> /// <param name="nlam">Maximum number of lambda values (default 100)</param> /// <param name="flmin">User control of lambda values (<1.0 => minimum lambda = flmin * (largest lambda value), >= 1.0 => use supplied lambda values</param> /// <param name="ulam">User supplied lambda values</param> /// <param name="lambda">Output lambda values</param> /// <param name="trainNMSE">Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the training set for each set of coefficients along the path</param> /// <param name="testNMSE">Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the test set for each set of coefficients along the path</param> /// <param name="coeff">Vector of coefficient vectors for each solution along the path</param> /// <param name="intercept">Vector of intercepts for each solution along the path</param> /// <param name="coeffLowerBound">Optional lower bound for all coefficients</param> /// <param name="coeffUpperBound">Optional upper bound for all coefficients</param> /// <param name="maxVars">Maximum allowed number of variables in each solution along the path (-1 => all variables are allowed)</param> private static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty, int nlam, double flmin, double[] ulam, out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept, double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity, int maxVars = -1 ) { if (penalty < 0.0 || penalty > 1.0) { throw new ArgumentException("0 <= penalty <= 1", "penalty"); } double[,] trainX; double[,] testX; double[] trainY; double[] testY; PrepareData(problemData, out trainX, out trainY, out testX, out testY); var numTrainObs = trainX.GetLength(1); var numTestObs = testX.GetLength(1); var numVars = trainX.GetLength(0); int ka = 1; // => covariance updating algorithm double parm = penalty; double[] w = Enumerable.Repeat(1.0, numTrainObs).ToArray(); // all observations have the same weight int[] jd = new int[1]; // do not force to use any of the variables double[] vp = Enumerable.Repeat(1.0, numVars).ToArray(); // all predictor variables are unpenalized double[,] cl = new double[numVars, 2]; // use the same bounds for all coefficients for (int i = 0; i < numVars; i++) { cl[i, 0] = coeffLowerBound; cl[i, 1] = coeffUpperBound; } int ne = maxVars > 0 ? maxVars : numVars; int nx = numVars; double thr = 1.0e-5; // default value as recommended in glmnet int isd = 1; // => regression on standardized predictor variables int intr = 1; // => do include intercept in model int maxit = 100000; // default value as recommended in glmnet // outputs int lmu = -1; double[,] ca; int[] ia; int[] nin; int nlp = -99; int jerr = -99; double[] trainR2; Glmnet.elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr); trainNMSE = new double[lmu]; // elnet returns R**2 as 1 - NMSE testNMSE = new double[lmu]; coeff = new double[lmu, numVars]; for (int solIdx = 0; solIdx < lmu; solIdx++) { trainNMSE[solIdx] = 1.0 - trainR2[solIdx]; // uncompress coefficients of solution int selectedNin = nin[solIdx]; double[] coefficients; double[] selectedCa = new double[nx]; for (int i = 0; i < nx; i++) { selectedCa[i] = ca[solIdx, i]; } // apply to test set to calculate test NMSE values for each lambda step double[] fn; Glmnet.modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn); OnlineCalculatorError error; var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(testY, fn, out error); if (error != OnlineCalculatorError.None) { nmse = double.NaN; } testNMSE[solIdx] = nmse; // uncompress coefficients Glmnet.uncomp(numVars, selectedCa, ia, selectedNin, out coefficients); for (int i = 0; i < coefficients.Length; i++) { coeff[solIdx, i] = coefficients[i]; } } }
public override IOperation Apply() { var operation = base.Apply(); var paretoFront = TrainingBestSolutionsParameter.ActualValue; IResult result; ScatterPlot qualityToTreeSize; if (!ResultCollection.TryGetValue("Pareto Front Analysis", out result)) { qualityToTreeSize = new ScatterPlot("Quality vs Tree Size", ""); qualityToTreeSize.VisualProperties.XAxisMinimumAuto = false; qualityToTreeSize.VisualProperties.XAxisMaximumAuto = false; qualityToTreeSize.VisualProperties.YAxisMinimumAuto = false; qualityToTreeSize.VisualProperties.YAxisMaximumAuto = false; qualityToTreeSize.VisualProperties.XAxisMinimumFixedValue = 0; qualityToTreeSize.VisualProperties.XAxisMaximumFixedValue = MaximumSymbolicExpressionTreeLengthParameter.ActualValue.Value; qualityToTreeSize.VisualProperties.YAxisMinimumFixedValue = 0; qualityToTreeSize.VisualProperties.YAxisMaximumFixedValue = 2; ResultCollection.Add(new Result("Pareto Front Analysis", qualityToTreeSize)); } else { qualityToTreeSize = (ScatterPlot)result.Value; } int previousTreeLength = -1; var sizeParetoFront = new LinkedList <ISymbolicRegressionSolution>(); foreach (var solution in paretoFront.OrderBy(s => s.Model.SymbolicExpressionTree.Length)) { int treeLength = solution.Model.SymbolicExpressionTree.Length; if (!sizeParetoFront.Any()) { sizeParetoFront.AddLast(solution); } if (solution.TrainingNormalizedMeanSquaredError < sizeParetoFront.Last.Value.TrainingNormalizedMeanSquaredError) { if (treeLength == previousTreeLength) { sizeParetoFront.RemoveLast(); } sizeParetoFront.AddLast(solution); } previousTreeLength = treeLength; } qualityToTreeSize.Rows.Clear(); var trainingRow = new ScatterPlotDataRow("Training NMSE", "", sizeParetoFront.Select(x => new Point2D <double>(x.Model.SymbolicExpressionTree.Length, x.TrainingNormalizedMeanSquaredError, x))); trainingRow.VisualProperties.PointSize = 8; qualityToTreeSize.Rows.Add(trainingRow); if (AnalyzeTestError) { var testRow = new ScatterPlotDataRow("Test NMSE", "", sizeParetoFront.Select(x => new Point2D <double>(x.Model.SymbolicExpressionTree.Length, x.TestNormalizedMeanSquaredError, x))); testRow.VisualProperties.PointSize = 8; qualityToTreeSize.Rows.Add(testRow); } var validationPartition = ValidationPartitionParameter.ActualValue; if (validationPartition.Size != 0) { var problemData = ProblemDataParameter.ActualValue; var validationIndizes = Enumerable.Range(validationPartition.Start, validationPartition.Size).ToList(); var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, validationIndizes).ToList(); OnlineCalculatorError error; var validationRow = new ScatterPlotDataRow("Validation NMSE", "", sizeParetoFront.Select(x => new Point2D <double>(x.Model.SymbolicExpressionTree.Length, OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, x.GetEstimatedValues(validationIndizes), out error)))); validationRow.VisualProperties.PointSize = 7; qualityToTreeSize.Rows.Add(validationRow); } return(operation); }