Esempio n. 1
0
        private IRegressionSolution CreateLinearRegressionSolution()
        {
            if (Content == null)
            {
                throw new InvalidOperationException();
            }
            double rmse, cvRmsError;
            var    problemData = (IRegressionProblemData)ProblemData.Clone();

            if (!problemData.TrainingIndices.Any())
            {
                return(null);                              // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model)
            }
            //clear checked inputVariables
            foreach (var inputVariable in problemData.InputVariables.CheckedItems)
            {
                problemData.InputVariables.SetItemCheckedState(inputVariable.Value, false);
            }

            //check inputVariables used in the symbolic regression model
            var usedVariables =
                Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType <VariableTreeNode>().Select(
                    node => node.VariableName).Distinct();

            foreach (var variable in usedVariables)
            {
                problemData.InputVariables.SetItemCheckedState(
                    problemData.InputVariables.First(x => x.Value == variable), true);
            }

            var solution = LinearRegression.CreateLinearRegressionSolution(problemData, out rmse, out cvRmsError);

            solution.Name = "Baseline (linear subset)";
            return(solution);
        }
        private IRegressionSolution CreateLinearRegressionSolution()
        {
            if (Content == null)
            {
                throw new InvalidOperationException();
            }
            double rmse, cvRmsError;
            var    problemData = (IRegressionProblemData)ProblemData.Clone();

            if (!problemData.TrainingIndices.Any())
            {
                return(null);                              // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model)
            }
            var usedVariables = Content.Model.VariablesUsedForPrediction;

            var usedDoubleVariables = usedVariables
                                      .Where(name => problemData.Dataset.VariableHasType <double>(name))
                                      .Distinct();

            var usedFactorVariables = usedVariables
                                      .Where(name => problemData.Dataset.VariableHasType <string>(name))
                                      .Distinct();

            // gkronber: for binary factors we actually produce a binary variable in the new dataset
            // but only if the variable is not used as a full factor anyway (LR creates binary columns anyway)
            var usedBinaryFactors =
                Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType <BinaryFactorVariableTreeNode>()
                .Where(node => !usedFactorVariables.Contains(node.VariableName))
                .Select(node => Tuple.Create(node.VariableValue, node.VariableValue));

            // create a new problem and dataset
            var variableNames =
                usedDoubleVariables
                .Concat(usedFactorVariables)
                .Concat(usedBinaryFactors.Select(t => t.Item1 + "=" + t.Item2))
                .Concat(new string[] { problemData.TargetVariable })
                .ToArray();
            var variableValues =
                usedDoubleVariables.Select(name => (IList)problemData.Dataset.GetDoubleValues(name).ToList())
                .Concat(usedFactorVariables.Select(name => problemData.Dataset.GetStringValues(name).ToList()))
                .Concat(
                    // create binary variable
                    usedBinaryFactors.Select(t => problemData.Dataset.GetReadOnlyStringValues(t.Item1).Select(val => val == t.Item2 ? 1.0 : 0.0).ToList())
                    )
                .Concat(new[] { problemData.Dataset.GetDoubleValues(problemData.TargetVariable).ToList() });

            var newDs          = new Dataset(variableNames, variableValues);
            var newProblemData = new RegressionProblemData(newDs, variableNames.Take(variableNames.Length - 1), variableNames.Last());

            newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
            newProblemData.TrainingPartition.End   = problemData.TrainingPartition.End;
            newProblemData.TestPartition.Start     = problemData.TestPartition.Start;
            newProblemData.TestPartition.End       = problemData.TestPartition.End;

            var solution = LinearRegression.CreateLinearRegressionSolution(newProblemData, out rmse, out cvRmsError);

            solution.Name = "Baseline (linear subset)";
            return(solution);
        }
        private IItem CreateSolution()
        {
            var problemData           = ProblemData;
            var ds                    = problemData.Dataset;
            var targetVariable        = problemData.TargetVariable;
            var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
            var trainingRows          = problemData.TrainingIndices.ToArray();

            lock (problemStateLocker) {
                var model = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, trainingRows, bestHyperParameters, (IMeanFunction)meanFunc.Clone(), (ICovarianceFunction)covFunc.Clone());
                model.FixParameters();
                return(model.CreateRegressionSolution((IRegressionProblemData)ProblemData.Clone()));
            }
        }
        private void AfterDeserialization()
        {
            if (!regressionSolutions.Any())
            {
                foreach (var model in Model.Models)
                {
                    IRegressionProblemData problemData = (IRegressionProblemData)ProblemData.Clone();
                    problemData.TrainingPartition.Start = trainingPartitions[model].Start;
                    problemData.TrainingPartition.End   = trainingPartitions[model].End;
                    problemData.TestPartition.Start     = testPartitions[model].Start;
                    problemData.TestPartition.End       = testPartitions[model].End;

                    regressionSolutions.Add(model.CreateRegressionSolution(problemData));
                }
            }
            RegisterRegressionSolutionsEventHandler();
        }