protected override void Run(CancellationToken cancellationToken) {
      // Set up the algorithm
      if (SetSeedRandomly) Seed = new System.Random().Next();
      var rand = new MersenneTwister((uint)Seed);

      // Set up the results display
      var iterations = new IntValue(0);
      Results.Add(new Result("Iterations", iterations));

      var table = new DataTable("Qualities");
      table.Rows.Add(new DataRow("R² (train)"));
      table.Rows.Add(new DataRow("R² (test)"));
      Results.Add(new Result("Qualities", table));
      var curLoss = new DoubleValue();
      var curTestLoss = new DoubleValue();
      Results.Add(new Result("R² (train)", curLoss));
      Results.Add(new Result("R² (test)", curTestLoss));
      var runCollection = new RunCollection();
      if (StoreRuns)
        Results.Add(new Result("Runs", runCollection));

      // init
      var problemData = Problem.ProblemData;
      var targetVarName = problemData.TargetVariable;
      var activeVariables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
      var modifiableDataset = new ModifiableDataset(
        activeVariables,
        activeVariables.Select(v => problemData.Dataset.GetDoubleValues(v).ToList()));

      var trainingRows = problemData.TrainingIndices;
      var testRows = problemData.TestIndices;
      var yPred = new double[trainingRows.Count()];
      var yPredTest = new double[testRows.Count()];
      var y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
      var curY = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();

      var yTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray();
      var curYTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray();
      var nu = Nu;
      var mVars = (int)Math.Ceiling(M * problemData.AllowedInputVariables.Count());
      var rRows = (int)Math.Ceiling(R * problemData.TrainingIndices.Count());
      var alg = RegressionAlgorithm;
      List<IRegressionModel> models = new List<IRegressionModel>();
      try {

        // Loop until iteration limit reached or canceled.
        for (int i = 0; i < Iterations; i++) {
          cancellationToken.ThrowIfCancellationRequested();

          modifiableDataset.RemoveVariable(targetVarName);
          modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest));

          SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed 
          var modifiableProblemData = new RegressionProblemData(modifiableDataset,
            problemData.AllowedInputVariables.SampleRandomWithoutRepetition(rand, mVars),
            problemData.TargetVariable);
          modifiableProblemData.TrainingPartition.Start = 0;
          modifiableProblemData.TrainingPartition.End = rRows;
          modifiableProblemData.TestPartition.Start = problemData.TestPartition.Start;
          modifiableProblemData.TestPartition.End = problemData.TestPartition.End;

          if (!TrySetProblemData(alg, modifiableProblemData))
            throw new NotSupportedException("The algorithm cannot be used with GBM.");

          IRegressionModel model;
          IRun run;

          // try to find a model. The algorithm might fail to produce a model. In this case we just retry until the iterations are exhausted
          if (TryExecute(alg, rand.Next(), RegressionAlgorithmResult, out model, out run)) {
            int row = 0;
            // update predictions for training and test
            // update new targets (in the case of squared error loss we simply use negative residuals)
            foreach (var pred in model.GetEstimatedValues(problemData.Dataset, trainingRows)) {
              yPred[row] = yPred[row] + nu * pred;
              curY[row] = y[row] - yPred[row];
              row++;
            }
            row = 0;
            foreach (var pred in model.GetEstimatedValues(problemData.Dataset, testRows)) {
              yPredTest[row] = yPredTest[row] + nu * pred;
              curYTest[row] = yTest[row] - yPredTest[row];
              row++;
            }
            // determine quality
            OnlineCalculatorError error;
            var trainR = OnlinePearsonsRCalculator.Calculate(yPred, y, out error);
            var testR = OnlinePearsonsRCalculator.Calculate(yPredTest, yTest, out error);

            // iteration results
            curLoss.Value = error == OnlineCalculatorError.None ? trainR * trainR : 0.0;
            curTestLoss.Value = error == OnlineCalculatorError.None ? testR * testR : 0.0;

            models.Add(model);


          }

          if (StoreRuns)
            runCollection.Add(run);
          table.Rows["R² (train)"].Values.Add(curLoss.Value);
          table.Rows["R² (test)"].Values.Add(curTestLoss.Value);
          iterations.Value = i + 1;
        }

        // produce solution 
        if (CreateSolution) {
          // when all our models are symbolic models we can easily combine them to a single model
          if (models.All(m => m is ISymbolicRegressionModel)) {
            Results.Add(new Result("Solution", CreateSymbolicSolution(models, Nu, (IRegressionProblemData)problemData.Clone())));
          }
          // just produce an ensemble solution for now (TODO: correct scaling or linear regression for ensemble model weights)

          var ensembleSolution = CreateEnsembleSolution(models, (IRegressionProblemData)problemData.Clone());
          Results.Add(new Result("EnsembleSolution", ensembleSolution));
        }
      }
      finally {
        // reset everything
        alg.Prepare(true);
      }
    }
Ejemplo n.º 2
0
        protected override void Run(CancellationToken cancellationToken)
        {
            // Set up the algorithm
            if (SetSeedRandomly)
            {
                Seed = RandomSeedGenerator.GetSeed();
            }
            var rand = new MersenneTwister((uint)Seed);

            // Set up the results display
            var iterations = new IntValue(0);

            Results.Add(new Result("Iterations", iterations));

            var table = new DataTable("Qualities");

            table.Rows.Add(new DataRow("R² (train)"));
            table.Rows.Add(new DataRow("R² (test)"));
            Results.Add(new Result("Qualities", table));
            var curLoss     = new DoubleValue();
            var curTestLoss = new DoubleValue();

            Results.Add(new Result("R² (train)", curLoss));
            Results.Add(new Result("R² (test)", curTestLoss));
            var runCollection = new RunCollection();

            if (StoreRuns)
            {
                Results.Add(new Result("Runs", runCollection));
            }

            // init
            var problemData       = Problem.ProblemData;
            var targetVarName     = problemData.TargetVariable;
            var activeVariables   = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
            var modifiableDataset = new ModifiableDataset(
                activeVariables,
                activeVariables.Select(v => problemData.Dataset.GetDoubleValues(v).ToList()));

            var trainingRows = problemData.TrainingIndices;
            var testRows     = problemData.TestIndices;
            var yPred        = new double[trainingRows.Count()];
            var yPredTest    = new double[testRows.Count()];
            var y            = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
            var curY         = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();

            var yTest    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray();
            var curYTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray();
            var nu       = Nu;
            var mVars    = (int)Math.Ceiling(M * problemData.AllowedInputVariables.Count());
            var rRows    = (int)Math.Ceiling(R * problemData.TrainingIndices.Count());
            var alg      = RegressionAlgorithm;
            List <IRegressionModel> models = new List <IRegressionModel>();

            try {
                // Loop until iteration limit reached or canceled.
                for (int i = 0; i < Iterations; i++)
                {
                    cancellationToken.ThrowIfCancellationRequested();

                    modifiableDataset.RemoveVariable(targetVarName);
                    modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList());

                    SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed
                    var modifiableProblemData = new RegressionProblemData(modifiableDataset,
                                                                          problemData.AllowedInputVariables.SampleRandomWithoutRepetition(rand, mVars),
                                                                          problemData.TargetVariable);
                    modifiableProblemData.TrainingPartition.Start = 0;
                    modifiableProblemData.TrainingPartition.End   = rRows;
                    modifiableProblemData.TestPartition.Start     = problemData.TestPartition.Start;
                    modifiableProblemData.TestPartition.End       = problemData.TestPartition.End;

                    if (!TrySetProblemData(alg, modifiableProblemData))
                    {
                        throw new NotSupportedException("The algorithm cannot be used with GBM.");
                    }

                    IRegressionModel model;
                    IRun             run;

                    // try to find a model. The algorithm might fail to produce a model. In this case we just retry until the iterations are exhausted
                    if (TryExecute(alg, rand.Next(), RegressionAlgorithmResult, out model, out run))
                    {
                        int row = 0;
                        // update predictions for training and test
                        // update new targets (in the case of squared error loss we simply use negative residuals)
                        foreach (var pred in model.GetEstimatedValues(problemData.Dataset, trainingRows))
                        {
                            yPred[row] = yPred[row] + nu * pred;
                            curY[row]  = y[row] - yPred[row];
                            row++;
                        }
                        row = 0;
                        foreach (var pred in model.GetEstimatedValues(problemData.Dataset, testRows))
                        {
                            yPredTest[row] = yPredTest[row] + nu * pred;
                            curYTest[row]  = yTest[row] - yPredTest[row];
                            row++;
                        }
                        // determine quality
                        OnlineCalculatorError error;
                        var trainR = OnlinePearsonsRCalculator.Calculate(yPred, y, out error);
                        var testR  = OnlinePearsonsRCalculator.Calculate(yPredTest, yTest, out error);

                        // iteration results
                        curLoss.Value     = error == OnlineCalculatorError.None ? trainR * trainR : 0.0;
                        curTestLoss.Value = error == OnlineCalculatorError.None ? testR * testR : 0.0;

                        models.Add(model);
                    }

                    if (StoreRuns)
                    {
                        runCollection.Add(run);
                    }
                    table.Rows["R² (train)"].Values.Add(curLoss.Value);
                    table.Rows["R² (test)"].Values.Add(curTestLoss.Value);
                    iterations.Value = i + 1;
                }

                // produce solution
                if (CreateSolution)
                {
                    // when all our models are symbolic models we can easily combine them to a single model
                    if (models.All(m => m is ISymbolicRegressionModel))
                    {
                        Results.Add(new Result("Solution", CreateSymbolicSolution(models, Nu, (IRegressionProblemData)problemData.Clone())));
                    }
                    // just produce an ensemble solution for now (TODO: correct scaling or linear regression for ensemble model weights)

                    var ensembleSolution = CreateEnsembleSolution(models, (IRegressionProblemData)problemData.Clone());
                    Results.Add(new Result("EnsembleSolution", ensembleSolution));
                }
            }
            finally {
                // reset everything
                alg.Prepare(true);
            }
        }