private IRegressionSolution CreateLinearRegressionSolution() { if (Content == null) { throw new InvalidOperationException(); } double rmse, cvRmsError; var problemData = (IRegressionProblemData)ProblemData.Clone(); if (!problemData.TrainingIndices.Any()) { return(null); // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model) } //clear checked inputVariables foreach (var inputVariable in problemData.InputVariables.CheckedItems) { problemData.InputVariables.SetItemCheckedState(inputVariable.Value, false); } //check inputVariables used in the symbolic regression model var usedVariables = Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType <VariableTreeNode>().Select( node => node.VariableName).Distinct(); foreach (var variable in usedVariables) { problemData.InputVariables.SetItemCheckedState( problemData.InputVariables.First(x => x.Value == variable), true); } var solution = LinearRegression.CreateLinearRegressionSolution(problemData, out rmse, out cvRmsError); solution.Name = "Baseline (linear subset)"; return(solution); }
private IRegressionSolution CreateLinearRegressionSolution() { if (Content == null) { throw new InvalidOperationException(); } double rmse, cvRmsError; var problemData = (IRegressionProblemData)ProblemData.Clone(); if (!problemData.TrainingIndices.Any()) { return(null); // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model) } var usedVariables = Content.Model.VariablesUsedForPrediction; var usedDoubleVariables = usedVariables .Where(name => problemData.Dataset.VariableHasType <double>(name)) .Distinct(); var usedFactorVariables = usedVariables .Where(name => problemData.Dataset.VariableHasType <string>(name)) .Distinct(); // gkronber: for binary factors we actually produce a binary variable in the new dataset // but only if the variable is not used as a full factor anyway (LR creates binary columns anyway) var usedBinaryFactors = Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType <BinaryFactorVariableTreeNode>() .Where(node => !usedFactorVariables.Contains(node.VariableName)) .Select(node => Tuple.Create(node.VariableValue, node.VariableValue)); // create a new problem and dataset var variableNames = usedDoubleVariables .Concat(usedFactorVariables) .Concat(usedBinaryFactors.Select(t => t.Item1 + "=" + t.Item2)) .Concat(new string[] { problemData.TargetVariable }) .ToArray(); var variableValues = usedDoubleVariables.Select(name => (IList)problemData.Dataset.GetDoubleValues(name).ToList()) .Concat(usedFactorVariables.Select(name => problemData.Dataset.GetStringValues(name).ToList())) .Concat( // create binary variable usedBinaryFactors.Select(t => problemData.Dataset.GetReadOnlyStringValues(t.Item1).Select(val => val == t.Item2 ? 1.0 : 0.0).ToList()) ) .Concat(new[] { problemData.Dataset.GetDoubleValues(problemData.TargetVariable).ToList() }); var newDs = new Dataset(variableNames, variableValues); var newProblemData = new RegressionProblemData(newDs, variableNames.Take(variableNames.Length - 1), variableNames.Last()); newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start; newProblemData.TrainingPartition.End = problemData.TrainingPartition.End; newProblemData.TestPartition.Start = problemData.TestPartition.Start; newProblemData.TestPartition.End = problemData.TestPartition.End; var solution = LinearRegression.CreateLinearRegressionSolution(newProblemData, out rmse, out cvRmsError); solution.Name = "Baseline (linear subset)"; return(solution); }
private IItem CreateSolution() { var problemData = ProblemData; var ds = problemData.Dataset; var targetVariable = problemData.TargetVariable; var allowedInputVariables = problemData.AllowedInputVariables.ToArray(); var trainingRows = problemData.TrainingIndices.ToArray(); lock (problemStateLocker) { var model = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, trainingRows, bestHyperParameters, (IMeanFunction)meanFunc.Clone(), (ICovarianceFunction)covFunc.Clone()); model.FixParameters(); return(model.CreateRegressionSolution((IRegressionProblemData)ProblemData.Clone())); } }
private void AfterDeserialization() { if (!regressionSolutions.Any()) { foreach (var model in Model.Models) { IRegressionProblemData problemData = (IRegressionProblemData)ProblemData.Clone(); problemData.TrainingPartition.Start = trainingPartitions[model].Start; problemData.TrainingPartition.End = trainingPartitions[model].End; problemData.TestPartition.Start = testPartitions[model].Start; problemData.TestPartition.End = testPartitions[model].End; regressionSolutions.Add(model.CreateRegressionSolution(problemData)); } } RegisterRegressionSolutionsEventHandler(); }