Пример #1
0
        protected override void Run(CancellationToken cancellationToken)
        {
            var problemData = Problem.ProblemData;
            // set up and initialize everything if necessary
            var wdist = DistanceFunction as WeightedEuclideanDistance;

            if (wdist != null)
            {
                wdist.Initialize(problemData);
            }
            if (state == null)
            {
                if (SetSeedRandomly)
                {
                    Seed = RandomSeedGenerator.GetSeed();
                }
                var random  = new MersenneTwister((uint)Seed);
                var dataset = problemData.Dataset;
                var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
                var allindices            = Problem.ProblemData.AllIndices.ToArray();

                // jagged array is required to meet the static method declarations of TSNEStatic<T>
                var data = Enumerable.Range(0, dataset.Rows).Select(x => new double[allowedInputVariables.Length]).ToArray();
                var col  = 0;
                foreach (var s in allowedInputVariables)
                {
                    var row = 0;
                    foreach (var d in dataset.GetDoubleValues(s))
                    {
                        data[row][col] = d;
                        row++;
                    }
                    col++;
                }
                if (Normalization)
                {
                    data = NormalizeInputData(data);
                }
                state = TSNEStatic <double[]> .CreateState(data, DistanceFunction, random, NewDimensions, Perplexity, Theta, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, RandomInitialization);

                SetUpResults(allindices);
            }
            while (state.iter < MaxIterations && !cancellationToken.IsCancellationRequested)
            {
                if (state.iter % UpdateInterval == 0)
                {
                    Analyze(state);
                }
                TSNEStatic <double[]> .Iterate(state);
            }
            Analyze(state);
        }
Пример #2
0
        protected override void Initialize(CancellationToken cancellationToken)
        {
            base.Initialize(cancellationToken);
            var random = new MersenneTwister();

            if (SetSeedRandomly)
            {
                Seed = RandomSeedGenerator.GetSeed();
            }
            random.Reset(Seed);
            stateScope = InitializeScope(random, Problem.ProblemData, Pruning, MinimalNodeSize, LeafModel, Splitter, GenerateRules, UseHoldout, HoldoutSize);
            stateScope.Variables.Add(new Variable("Algorithm", this));
            Results.AddOrUpdateResult("StateScope", stateScope);
        }
Пример #3
0
        protected override void Initialize(CancellationToken cancellationToken)
        {
            if (SetSeedRandomly)
            {
                Seed = RandomSeedGenerator.GetSeed();
            }
            random.Reset(Seed);
            gauss = new NormalDistributedRandom(random, 0, 1);

            InitResults();
            InitStrategy();
            InitSolutions();
            Analyze();

            ResultsIterations = 1;
        }
Пример #4
0
        protected override void Run(CancellationToken cancellationToken)
        {
            IRegressionSolution bestSolution = null;

            if (InitializeParametersRandomly)
            {
                var qualityTable = new DataTable("RMSE table");
                qualityTable.VisualProperties.YAxisLogScale = true;
                var trainRMSERow = new DataRow("RMSE (train)");
                trainRMSERow.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
                var testRMSERow = new DataRow("RMSE test");
                testRMSERow.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;

                qualityTable.Rows.Add(trainRMSERow);
                qualityTable.Rows.Add(testRMSERow);
                Results.Add(new Result(qualityTable.Name, qualityTable.Name + " for all restarts", qualityTable));
                if (SetSeedRandomly)
                {
                    Seed = RandomSeedGenerator.GetSeed();
                }
                var rand = new MersenneTwister((uint)Seed);
                bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling, rand);
                trainRMSERow.Values.Add(bestSolution.TrainingRootMeanSquaredError);
                testRMSERow.Values.Add(bestSolution.TestRootMeanSquaredError);
                for (int r = 0; r < Restarts; r++)
                {
                    var solution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling, rand);
                    trainRMSERow.Values.Add(solution.TrainingRootMeanSquaredError);
                    testRMSERow.Values.Add(solution.TestRootMeanSquaredError);
                    if (solution.TrainingRootMeanSquaredError < bestSolution.TrainingRootMeanSquaredError)
                    {
                        bestSolution = solution;
                    }
                }
            }
            else
            {
                bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling);
            }

            Results.Add(new Result(RegressionSolutionResultName, "The nonlinear regression solution.", bestSolution));
            Results.Add(new Result("Root mean square error (train)", "The root of the mean of squared errors of the regression solution on the training set.", new DoubleValue(bestSolution.TrainingRootMeanSquaredError)));
            Results.Add(new Result("Root mean square error (test)", "The root of the mean of squared errors of the regression solution on the test set.", new DoubleValue(bestSolution.TestRootMeanSquaredError)));
        }
        protected override void Initialize(CancellationToken cancellationToken)
        {
            // Set up the algorithm
            if (SetSeedRandomly)
            {
                Seed = RandomSeedGenerator.GetSeed();
            }
            pyramid = new List <Population>();
            seen.Clear();
            random.Reset(Seed);
            tracker = new EvaluationTracker(Problem, MaximumEvaluations);

            // Set up the results display
            Results.Add(new Result("Iterations", new IntValue(0)));
            Results.Add(new Result("Evaluations", new IntValue(0)));
            Results.Add(new Result("Best Solution", new BinaryVector(tracker.BestSolution)));
            Results.Add(new Result("Best Quality", new DoubleValue(tracker.BestQuality)));
            Results.Add(new Result("Evaluation Best Solution Was Found", new IntValue(tracker.BestFoundOnEvaluation)));
            var table = new DataTable("Qualities");

            table.Rows.Add(new DataRow("Best Quality"));
            var iterationRows = new DataRow("Iteration Quality");

            iterationRows.VisualProperties.LineStyle = DataRowVisualProperties.DataRowLineStyle.Dot;
            table.Rows.Add(iterationRows);
            Results.Add(new Result("Qualities", table));

            table = new DataTable("Pyramid Levels");
            table.Rows.Add(new DataRow("Levels"));
            Results.Add(new Result("Pyramid Levels", table));

            table = new DataTable("Stored Solutions");
            table.Rows.Add(new DataRow("Solutions"));
            Results.Add(new Result("Stored Solutions", table));

            base.Initialize(cancellationToken);
        }
Пример #6
0
        public void Start(CancellationToken cancellationToken)
        {
            lock (locker) {
                if (startPending)
                {
                    return;
                }
                startPending = true;
            }

            try {
                if ((ExecutionState != ExecutionState.Prepared) && (ExecutionState != ExecutionState.Paused))
                {
                    throw new InvalidOperationException(string.Format("Start not allowed in execution state \"{0}\".", ExecutionState));
                }
                seed = RandomSeedGenerator.GetSeed();

                if (Algorithm == null)
                {
                    return;
                }
                //create cloned algorithms
                if (clonedAlgorithms.Count == 0)
                {
                    int      testSamplesCount = (SamplesEnd.Value - SamplesStart.Value) / Folds.Value;
                    IDataset shuffledDataset  = null;
                    for (int i = 0; i < Folds.Value; i++)
                    {
                        var cloner = new Cloner();
                        if (ShuffleSamples.Value)
                        {
                            var random = new FastRandom(seed);
                            var dataAnalysisProblem = (IDataAnalysisProblem)algorithm.Problem;
                            var dataset             = (Dataset)dataAnalysisProblem.ProblemData.Dataset;
                            shuffledDataset = shuffledDataset ?? dataset.Shuffle(random);
                            cloner.RegisterClonedObject(dataset, shuffledDataset);
                        }
                        IAlgorithm clonedAlgorithm = cloner.Clone(Algorithm);
                        clonedAlgorithm.Name = algorithm.Name + " Fold " + i;
                        IDataAnalysisProblem         problem         = clonedAlgorithm.Problem as IDataAnalysisProblem;
                        ISymbolicDataAnalysisProblem symbolicProblem = problem as ISymbolicDataAnalysisProblem;

                        int testStart = (i * testSamplesCount) + SamplesStart.Value;
                        int testEnd   = (i + 1) == Folds.Value ? SamplesEnd.Value : (i + 1) * testSamplesCount + SamplesStart.Value;

                        problem.ProblemData.TrainingPartition.Start = SamplesStart.Value;
                        problem.ProblemData.TrainingPartition.End   = SamplesEnd.Value;
                        problem.ProblemData.TestPartition.Start     = testStart;
                        problem.ProblemData.TestPartition.End       = testEnd;
                        DataAnalysisProblemData problemData = problem.ProblemData as DataAnalysisProblemData;
                        if (problemData != null)
                        {
                            problemData.TrainingPartitionParameter.Hidden = false;
                            problemData.TestPartitionParameter.Hidden     = false;
                        }

                        if (symbolicProblem != null)
                        {
                            symbolicProblem.FitnessCalculationPartition.Start = SamplesStart.Value;
                            symbolicProblem.FitnessCalculationPartition.End   = SamplesEnd.Value;
                        }
                        clonedAlgorithm.Prepare();
                        clonedAlgorithms.Add(clonedAlgorithm);
                    }
                }

                OnStarted();
            } finally {
                if (startPending)
                {
                    startPending = false;
                }
            }

            availableWorkers      = new SemaphoreSlim(NumberOfWorkers.Value, NumberOfWorkers.Value);
            allAlgorithmsFinished = new ManualResetEventSlim(false);

            var startedTasks = new List <Task>(clonedAlgorithms.Count);

            //start prepared or paused cloned algorithms
            foreach (IAlgorithm clonedAlgorithm in clonedAlgorithms)
            {
                if (pausePending || stopPending || ExecutionState != ExecutionState.Started)
                {
                    break;
                }
                if (clonedAlgorithm.ExecutionState == ExecutionState.Prepared ||
                    clonedAlgorithm.ExecutionState == ExecutionState.Paused)
                {
                    availableWorkers.Wait();
                    lock (locker) {
                        if (pausePending || stopPending || ExecutionState != ExecutionState.Started)
                        {
                            break;
                        }
                        var task = clonedAlgorithm.StartAsync(cancellationToken);
                        startedTasks.Add(task);
                    }
                }
            }

            allAlgorithmsFinished.Wait();

            Task.WaitAll(startedTasks.ToArray()); // to get exceptions not handled within the tasks
        }
Пример #7
0
        protected override void Run(CancellationToken cancellationToken)
        {
            // Set up the algorithm
            if (SetSeedRandomly)
            {
                Seed = RandomSeedGenerator.GetSeed();
            }
            var rand = new MersenneTwister((uint)Seed);

            // Set up the results display
            var iterations = new IntValue(0);

            Results.Add(new Result("Iterations", iterations));

            var table = new DataTable("Qualities");

            table.Rows.Add(new DataRow("R² (train)"));
            table.Rows.Add(new DataRow("R² (test)"));
            Results.Add(new Result("Qualities", table));
            var curLoss     = new DoubleValue();
            var curTestLoss = new DoubleValue();

            Results.Add(new Result("R² (train)", curLoss));
            Results.Add(new Result("R² (test)", curTestLoss));
            var runCollection = new RunCollection();

            if (StoreRuns)
            {
                Results.Add(new Result("Runs", runCollection));
            }

            // init
            var problemData       = Problem.ProblemData;
            var targetVarName     = problemData.TargetVariable;
            var activeVariables   = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
            var modifiableDataset = new ModifiableDataset(
                activeVariables,
                activeVariables.Select(v => problemData.Dataset.GetDoubleValues(v).ToList()));

            var trainingRows = problemData.TrainingIndices;
            var testRows     = problemData.TestIndices;
            var yPred        = new double[trainingRows.Count()];
            var yPredTest    = new double[testRows.Count()];
            var y            = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
            var curY         = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();

            var yTest    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray();
            var curYTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray();
            var nu       = Nu;
            var mVars    = (int)Math.Ceiling(M * problemData.AllowedInputVariables.Count());
            var rRows    = (int)Math.Ceiling(R * problemData.TrainingIndices.Count());
            var alg      = RegressionAlgorithm;
            List <IRegressionModel> models = new List <IRegressionModel>();

            try {
                // Loop until iteration limit reached or canceled.
                for (int i = 0; i < Iterations; i++)
                {
                    cancellationToken.ThrowIfCancellationRequested();

                    modifiableDataset.RemoveVariable(targetVarName);
                    modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList());

                    SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed
                    var modifiableProblemData = new RegressionProblemData(modifiableDataset,
                                                                          problemData.AllowedInputVariables.SampleRandomWithoutRepetition(rand, mVars),
                                                                          problemData.TargetVariable);
                    modifiableProblemData.TrainingPartition.Start = 0;
                    modifiableProblemData.TrainingPartition.End   = rRows;
                    modifiableProblemData.TestPartition.Start     = problemData.TestPartition.Start;
                    modifiableProblemData.TestPartition.End       = problemData.TestPartition.End;

                    if (!TrySetProblemData(alg, modifiableProblemData))
                    {
                        throw new NotSupportedException("The algorithm cannot be used with GBM.");
                    }

                    IRegressionModel model;
                    IRun             run;

                    // try to find a model. The algorithm might fail to produce a model. In this case we just retry until the iterations are exhausted
                    if (TryExecute(alg, rand.Next(), RegressionAlgorithmResult, out model, out run))
                    {
                        int row = 0;
                        // update predictions for training and test
                        // update new targets (in the case of squared error loss we simply use negative residuals)
                        foreach (var pred in model.GetEstimatedValues(problemData.Dataset, trainingRows))
                        {
                            yPred[row] = yPred[row] + nu * pred;
                            curY[row]  = y[row] - yPred[row];
                            row++;
                        }
                        row = 0;
                        foreach (var pred in model.GetEstimatedValues(problemData.Dataset, testRows))
                        {
                            yPredTest[row] = yPredTest[row] + nu * pred;
                            curYTest[row]  = yTest[row] - yPredTest[row];
                            row++;
                        }
                        // determine quality
                        OnlineCalculatorError error;
                        var trainR = OnlinePearsonsRCalculator.Calculate(yPred, y, out error);
                        var testR  = OnlinePearsonsRCalculator.Calculate(yPredTest, yTest, out error);

                        // iteration results
                        curLoss.Value     = error == OnlineCalculatorError.None ? trainR * trainR : 0.0;
                        curTestLoss.Value = error == OnlineCalculatorError.None ? testR * testR : 0.0;

                        models.Add(model);
                    }

                    if (StoreRuns)
                    {
                        runCollection.Add(run);
                    }
                    table.Rows["R² (train)"].Values.Add(curLoss.Value);
                    table.Rows["R² (test)"].Values.Add(curTestLoss.Value);
                    iterations.Value = i + 1;
                }

                // produce solution
                if (CreateSolution)
                {
                    // when all our models are symbolic models we can easily combine them to a single model
                    if (models.All(m => m is ISymbolicRegressionModel))
                    {
                        Results.Add(new Result("Solution", CreateSymbolicSolution(models, Nu, (IRegressionProblemData)problemData.Clone())));
                    }
                    // just produce an ensemble solution for now (TODO: correct scaling or linear regression for ensemble model weights)

                    var ensembleSolution = CreateEnsembleSolution(models, (IRegressionProblemData)problemData.Clone());
                    Results.Add(new Result("EnsembleSolution", ensembleSolution));
                }
            }
            finally {
                // reset everything
                alg.Prepare(true);
            }
        }