private IEnumerable <IClassificationSolution> GenerateClassificationSolutions(IClassificationProblemData problemData) { var newSolutions = new List <IClassificationSolution>(); var zeroR = ZeroR.CreateZeroRSolution(problemData); zeroR.Name = "ZeroR Classification Solution"; newSolutions.Add(zeroR); var oneR = OneR.CreateOneRSolution(problemData); oneR.Name = "OneR Classification Solution"; newSolutions.Add(oneR); try { var lda = LinearDiscriminantAnalysis.CreateLinearDiscriminantAnalysisSolution(problemData); lda.Name = "Linear Discriminant Analysis Solution"; newSolutions.Add(lda); } catch (NotSupportedException) { } catch (ArgumentException) { } return(newSolutions); }
protected override IEnumerable <IClassificationSolution> GenerateClassificationSolutions() { var solutionsBase = base.GenerateClassificationSolutions(); var solutions = new List <IClassificationSolution>(); var symbolicSolution = Content; // does not support lagged variables if (symbolicSolution.Model.SymbolicExpressionTree.IterateNodesPrefix().OfType <LaggedVariableTreeNode>().Any()) { return(solutionsBase); } var problemData = (IClassificationProblemData)symbolicSolution.ProblemData.Clone(); if (!problemData.TrainingIndices.Any()) { return(null); // don't create an comparison models if the problem does not have a training set (e.g. loaded into an existing model) } var usedVariables = Content.Model.SymbolicExpressionTree.IterateNodesPostfix() .OfType <IVariableTreeNode>() .Select(node => node.VariableName).ToArray(); var usedDoubleVariables = usedVariables .Where(name => problemData.Dataset.VariableHasType <double>(name)) .Distinct(); var usedFactorVariables = usedVariables .Where(name => problemData.Dataset.VariableHasType <string>(name)) .Distinct(); // gkronber: for binary factors we actually produce a binary variable in the new dataset // but only if the variable is not used as a full factor anyway (LR creates binary columns anyway) var usedBinaryFactors = Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType <BinaryFactorVariableTreeNode>() .Where(node => !usedFactorVariables.Contains(node.VariableName)) .Select(node => Tuple.Create(node.VariableValue, node.VariableValue)); // create a new problem and dataset var variableNames = usedDoubleVariables .Concat(usedFactorVariables) .Concat(usedBinaryFactors.Select(t => t.Item1 + "=" + t.Item2)) .Concat(new string[] { problemData.TargetVariable }) .ToArray(); var variableValues = usedDoubleVariables.Select(name => (IList)problemData.Dataset.GetDoubleValues(name).ToList()) .Concat(usedFactorVariables.Select(name => problemData.Dataset.GetStringValues(name).ToList())) .Concat( // create binary variable usedBinaryFactors.Select(t => problemData.Dataset.GetReadOnlyStringValues(t.Item1).Select(val => val == t.Item2 ? 1.0 : 0.0).ToList()) ) .Concat(new[] { problemData.Dataset.GetDoubleValues(problemData.TargetVariable).ToList() }); var newDs = new Dataset(variableNames, variableValues); var newProblemData = new ClassificationProblemData(newDs, variableNames.Take(variableNames.Length - 1), variableNames.Last()); newProblemData.PositiveClass = problemData.PositiveClass; newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start; newProblemData.TrainingPartition.End = problemData.TrainingPartition.End; newProblemData.TestPartition.Start = problemData.TestPartition.Start; newProblemData.TestPartition.End = problemData.TestPartition.End; try { var oneR = OneR.CreateOneRSolution(newProblemData); oneR.Name = "OneR Classification Solution (subset)"; solutions.Add(oneR); } catch (NotSupportedException) { } catch (ArgumentException) { } try { var lda = LinearDiscriminantAnalysis.CreateLinearDiscriminantAnalysisSolution(newProblemData); lda.Name = "Linear Discriminant Analysis Solution (subset)"; solutions.Add(lda); } catch (NotSupportedException) { } catch (ArgumentException) { } return(solutionsBase.Concat(solutions)); }