Exemplo n.º 1
0
        public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling)
        {
            IEnumerable <double>  estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
            IEnumerable <double>  targetValues    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            OnlineCalculatorError errorState;

            double mse;

            if (applyLinearScaling)
            {
                var mseCalculator = new OnlineMeanSquaredErrorCalculator();
                CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, mseCalculator, problemData.Dataset.Rows);
                errorState = mseCalculator.ErrorState;
                mse        = mseCalculator.MeanSquaredError;
            }
            else
            {
                IEnumerable <double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
                mse = OnlineMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
            }
            if (errorState != OnlineCalculatorError.None)
            {
                return(Double.NaN);
            }
            return(mse);
        }
    public static double Calculate(ISymbolicTimeSeriesPrognosisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, ITimeSeriesPrognosisProblemData problemData, IEnumerable<int> rows, IntRange evaluationPartition, int horizon, bool applyLinearScaling) {
      var horizions = rows.Select(r => Math.Min(horizon, evaluationPartition.End - r));
      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows.Zip(horizions, Enumerable.Range).SelectMany(r => r));
      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows, horizions).SelectMany(x => x);
      OnlineCalculatorError errorState;

      double mse;
      if (applyLinearScaling && horizon == 1) { //perform normal evaluation and afterwards scale the solution and calculate the fitness value        
        var mseCalculator = new OnlineMeanSquaredErrorCalculator();
        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, mseCalculator, problemData.Dataset.Rows * horizon);
        errorState = mseCalculator.ErrorState;
        mse = mseCalculator.MeanSquaredError;
      } else if (applyLinearScaling) { //first create model to perform linear scaling and afterwards calculate fitness for the scaled model
        var model = new SymbolicTimeSeriesPrognosisModel((ISymbolicExpressionTree)solution.Clone(), interpreter, lowerEstimationLimit, upperEstimationLimit);
        model.Scale(problemData);
        var scaledSolution = model.SymbolicExpressionTree;
        estimatedValues = interpreter.GetSymbolicExpressionTreeValues(scaledSolution, problemData.Dataset, rows, horizions).SelectMany(x => x);
        var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
        mse = OnlineMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
      } else {
        var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
        mse = OnlineMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
      }

      if (errorState != OnlineCalculatorError.None) return Double.NaN;
      else return mse;
    }
Exemplo n.º 3
0
        // calculate variable relevance based on removal of variables
        //  1) to remove a variable we set it's length scale to infinity (no relation of the variable value to the target)
        //  2) calculate MSE of the original target values (y) to the updated targes y' (after variable removal)
        //  3) relevance is larger if MSE(y,y') is large
        //  4) scale impacts so that the most important variable has impact = 1
        private double[] CalculateRelevance(double[] y, double[] u, List <double>[] xs, double[] l)
        {
            int nRows     = xs.First().Count;
            var changedL  = new double[l.Length];
            var relevance = new double[l.Length];

            for (int i = 0; i < l.Length; i++)
            {
                Array.Copy(l, changedL, changedL.Length);
                changedL[i] = double.MaxValue;
                var changedK = CalculateCovariance(xs, changedL);

                var yChanged = new double[u.Length];
                alglib.ablas.rmatrixmv(nRows, nRows, changedK, 0, 0, 0, u, 0, ref yChanged, 0);

                OnlineCalculatorError error;
                var mse = OnlineMeanSquaredErrorCalculator.Calculate(y, yChanged, out error);
                if (error != OnlineCalculatorError.None)
                {
                    mse = double.MaxValue;
                }
                relevance[i] = mse;
            }
            // scale so that max relevance is 1.0
            var maxRel = relevance.Max();

            for (int i = 0; i < relevance.Length; i++)
            {
                relevance[i] /= maxRel;
            }
            return(relevance);
        }
Exemplo n.º 4
0
        // calculate variable relevance based on removal of variables
        //  1) to remove a variable we set it's coefficient to zero
        //  2) calculate MSE of the original target values (y) to the updated targes y' (after variable removal)
        //  3) relevance is larger if MSE(y,y') is large
        //  4) scale impacts so that the most important variable has impact = 1
        private double[] CalculateRelevance(double[] y, List <double>[] xs, double[] l)
        {
            var changedL  = new double[l.Length];
            var relevance = new double[l.Length];

            for (int i = 0; i < l.Length; i++)
            {
                Array.Copy(l, changedL, changedL.Length);
                changedL[i] = 0.0;

                var yChanged = EvaluteLinearModel(xs, changedL);

                OnlineCalculatorError error;
                var mse = OnlineMeanSquaredErrorCalculator.Calculate(y, yChanged, out error);
                if (error != OnlineCalculatorError.None)
                {
                    mse = double.MaxValue;
                }
                relevance[i] = mse;
            }
            // scale so that max relevance is 1.0
            var maxRel = relevance.Max();

            for (int i = 0; i < relevance.Length; i++)
            {
                relevance[i] /= maxRel;
            }
            return(relevance);
        }
Exemplo n.º 5
0
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable <int> rows)
        {
            IEnumerable <double>  estimatedValues         = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
            IEnumerable <double>  originalValues          = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            IEnumerable <double>  boundedEstimationValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
            OnlineCalculatorError errorState;
            double mse = OnlineMeanSquaredErrorCalculator.Calculate(originalValues, boundedEstimationValues, out errorState);

            if (errorState != OnlineCalculatorError.None)
            {
                mse = double.NaN;
            }
            return(new double[2] {
                mse, solution.Length
            });
        }
    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
      OnlineCalculatorError errorState;

      double mse;
      if (applyLinearScaling) {
        var mseCalculator = new OnlineMeanSquaredErrorCalculator();
        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, mseCalculator, problemData.Dataset.Rows);
        errorState = mseCalculator.ErrorState;
        mse = mseCalculator.MeanSquaredError;
      } else {
        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
        mse = OnlineMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
      }
      if (errorState != OnlineCalculatorError.None) return Double.NaN;
      return mse;
    }
        private static double CalculateCrossValidationPartitions(Tuple <svm_problem, svm_problem>[] partitions, svm_parameter parameters)
        {
            double avgTestMse = 0;
            var    calc       = new OnlineMeanSquaredErrorCalculator();

            foreach (Tuple <svm_problem, svm_problem> tuple in partitions)
            {
                var trainingSvmProblem = tuple.Item1;
                var testSvmProblem     = tuple.Item2;
                var model = svm.svm_train(trainingSvmProblem, parameters);
                calc.Reset();
                for (int i = 0; i < testSvmProblem.l; ++i)
                {
                    calc.Add(testSvmProblem.y[i], svm.svm_predict(model, testSvmProblem.x[i]));
                }
                double mse = calc.ErrorState == OnlineCalculatorError.None ? calc.MeanSquaredError : double.NaN;
                avgTestMse += mse;
            }
            avgTestMse /= partitions.Length;
            return(avgTestMse);
        }
        public static double Calculate(ISymbolicTimeSeriesPrognosisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, ITimeSeriesPrognosisProblemData problemData, IEnumerable <int> rows, IntRange evaluationPartition, int horizon, bool applyLinearScaling)
        {
            var horizions = rows.Select(r => Math.Min(horizon, evaluationPartition.End - r));
            IEnumerable <double>  targetValues    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows.Zip(horizions, Enumerable.Range).SelectMany(r => r));
            IEnumerable <double>  estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows, horizions).SelectMany(x => x);
            OnlineCalculatorError errorState;

            double mse;

            if (applyLinearScaling && horizon == 1) //perform normal evaluation and afterwards scale the solution and calculate the fitness value
            {
                var mseCalculator = new OnlineMeanSquaredErrorCalculator();
                CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, mseCalculator, problemData.Dataset.Rows * horizon);
                errorState = mseCalculator.ErrorState;
                mse        = mseCalculator.MeanSquaredError;
            }
            else if (applyLinearScaling) //first create model to perform linear scaling and afterwards calculate fitness for the scaled model
            {
                var model = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, (ISymbolicExpressionTree)solution.Clone(), interpreter, lowerEstimationLimit, upperEstimationLimit);
                model.Scale(problemData);
                var scaledSolution = model.SymbolicExpressionTree;
                estimatedValues = interpreter.GetSymbolicExpressionTreeValues(scaledSolution, problemData.Dataset, rows, horizions).SelectMany(x => x);
                var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
                mse = OnlineMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
            }
            else
            {
                var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
                mse = OnlineMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
            }

            if (errorState != OnlineCalculatorError.None)
            {
                return(Double.NaN);
            }
            else
            {
                return(mse);
            }
        }
        private static void CrossValidate(IRegressionProblemData problemData, Tuple <IEnumerable <int>, IEnumerable <int> >[] partitions, int nTrees, double r, double m, int seed, out double avgTestMse)
        {
            avgTestMse = 0;
            var ds             = problemData.Dataset;
            var targetVariable = GetTargetVariableName(problemData);

            foreach (var tuple in partitions)
            {
                double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;
                var    trainingRandomForestPartition = tuple.Item1;
                var    testRandomForestPartition     = tuple.Item2;
                var    model           = RandomForestModel.CreateRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError);
                var    estimatedValues = model.GetEstimatedValues(ds, testRandomForestPartition);
                var    targetValues    = ds.GetDoubleValues(targetVariable, testRandomForestPartition);
                OnlineCalculatorError calculatorError;
                double mse = OnlineMeanSquaredErrorCalculator.Calculate(estimatedValues, targetValues, out calculatorError);
                if (calculatorError != OnlineCalculatorError.None)
                {
                    mse = double.NaN;
                }
                avgTestMse += mse;
            }
            avgTestMse /= partitions.Length;
        }
        protected override void Run()
        {
            IRegressionProblemData problemData            = Problem.ProblemData;
            IEnumerable <string>   selectedInputVariables = problemData.AllowedInputVariables;
            int nSv;
            ISupportVectorMachineModel model;

            Run(problemData, selectedInputVariables, SvmType.Value, KernelType.Value, Cost.Value, Nu.Value, Gamma.Value, Epsilon.Value, Degree.Value, out model, out nSv);

            if (CreateSolution)
            {
                var solution = new SupportVectorRegressionSolution((SupportVectorMachineModel)model, (IRegressionProblemData)problemData.Clone());
                Results.Add(new Result("Support vector regression solution", "The support vector regression solution.", solution));
            }

            Results.Add(new Result("Number of support vectors", "The number of support vectors of the SVR solution.", new IntValue(nSv)));


            {
                // calculate regression model metrics
                var ds         = problemData.Dataset;
                var trainRows  = problemData.TrainingIndices;
                var testRows   = problemData.TestIndices;
                var yTrain     = ds.GetDoubleValues(problemData.TargetVariable, trainRows);
                var yTest      = ds.GetDoubleValues(problemData.TargetVariable, testRows);
                var yPredTrain = model.GetEstimatedValues(ds, trainRows).ToArray();
                var yPredTest  = model.GetEstimatedValues(ds, testRows).ToArray();

                OnlineCalculatorError error;
                var trainMse = OnlineMeanSquaredErrorCalculator.Calculate(yPredTrain, yTrain, out error);
                if (error != OnlineCalculatorError.None)
                {
                    trainMse = double.MaxValue;
                }
                var testMse = OnlineMeanSquaredErrorCalculator.Calculate(yPredTest, yTest, out error);
                if (error != OnlineCalculatorError.None)
                {
                    testMse = double.MaxValue;
                }

                Results.Add(new Result("Mean squared error (training)", "The mean of squared errors of the SVR solution on the training partition.", new DoubleValue(trainMse)));
                Results.Add(new Result("Mean squared error (test)", "The mean of squared errors of the SVR solution on the test partition.", new DoubleValue(testMse)));


                var trainMae = OnlineMeanAbsoluteErrorCalculator.Calculate(yPredTrain, yTrain, out error);
                if (error != OnlineCalculatorError.None)
                {
                    trainMae = double.MaxValue;
                }
                var testMae = OnlineMeanAbsoluteErrorCalculator.Calculate(yPredTest, yTest, out error);
                if (error != OnlineCalculatorError.None)
                {
                    testMae = double.MaxValue;
                }

                Results.Add(new Result("Mean absolute error (training)", "The mean of absolute errors of the SVR solution on the training partition.", new DoubleValue(trainMae)));
                Results.Add(new Result("Mean absolute error (test)", "The mean of absolute errors of the SVR solution on the test partition.", new DoubleValue(testMae)));


                var trainRelErr = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(yPredTrain, yTrain, out error);
                if (error != OnlineCalculatorError.None)
                {
                    trainRelErr = double.MaxValue;
                }
                var testRelErr = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(yPredTest, yTest, out error);
                if (error != OnlineCalculatorError.None)
                {
                    testRelErr = double.MaxValue;
                }

                Results.Add(new Result("Average relative error (training)", "The mean of relative errors of the SVR solution on the training partition.", new DoubleValue(trainRelErr)));
                Results.Add(new Result("Average relative error (test)", "The mean of relative errors of the SVR solution on the test partition.", new DoubleValue(testRelErr)));
            }
        }