public static IEnumerable <Tuple <string, double> > CalculateImpacts(IRegressionSolution solution, DataPartitionEnum data = DataPartitionEnum.Training, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) { var problemData = solution.ProblemData; var dataset = problemData.Dataset; IEnumerable <int> rows; IEnumerable <double> targetValues; double originalR2 = -1; OnlineCalculatorError error; switch (data) { case DataPartitionEnum.All: rows = solution.ProblemData.AllIndices; targetValues = problemData.TargetVariableValues.ToList(); originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation."); } originalR2 = originalR2 * originalR2; break; case DataPartitionEnum.Training: rows = problemData.TrainingIndices; targetValues = problemData.TargetVariableTrainingValues.ToList(); originalR2 = solution.TrainingRSquared; break; case DataPartitionEnum.Test: rows = problemData.TestIndices; targetValues = problemData.TargetVariableTestValues.ToList(); originalR2 = solution.TestRSquared; break; default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data)); } var impacts = new Dictionary <string, double>(); var modifiableDataset = ((Dataset)dataset).ToModifiable(); foreach (var inputVariable in problemData.AllowedInputVariables) { var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation with replaced inputs."); } newR2 = newR2 * newR2; var impact = originalR2 - newR2; impacts[inputVariable] = impact; } return(impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value))); }
public static IEnumerable <Tuple <string, double> > CalculateImpacts( IRegressionSolution solution, ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, DataPartitionEnum dataPartition = DataPartitionEnum.Training) { IEnumerable <int> rows = GetPartitionRows(dataPartition, solution.ProblemData); IEnumerable <double> estimatedValues = solution.GetEstimatedValues(rows); return(CalculateImpacts(solution.Model, solution.ProblemData, estimatedValues, rows, replacementMethod, factorReplacementMethod)); }
public static IEnumerable <Tuple <string, double> > CalculateImpacts( IClassificationSolution solution, ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, DataPartitionEnum dataPartition = DataPartitionEnum.Training) { IEnumerable <int> rows = GetPartitionRows(dataPartition, solution.ProblemData); IEnumerable <double> estimatedClassValues = solution.GetEstimatedClassValues(rows); var model = (IClassificationModel)solution.Model.Clone(); //mkommend: clone of model is necessary, because the thresholds for IDiscriminantClassificationModels are updated return(CalculateImpacts(model, solution.ProblemData, estimatedClassValues, rows, replacementMethod, factorReplacementMethod)); }
public static IEnumerable <int> GetPartitionRows(DataPartitionEnum dataPartition, IRegressionProblemData problemData) { IEnumerable <int> rows; switch (dataPartition) { case DataPartitionEnum.All: rows = problemData.AllIndices; break; case DataPartitionEnum.Test: rows = problemData.TestIndices; break; case DataPartitionEnum.Training: rows = problemData.TrainingIndices; break; default: throw new NotSupportedException("DataPartition not supported"); } return(rows); }
public static IEnumerable <Tuple <string, double> > CalculateImpacts( IRegressionSolution solution, DataPartitionEnum data = DataPartitionEnum.Training, ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median, FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) { var problemData = solution.ProblemData; var dataset = problemData.Dataset; IEnumerable <int> rows; IEnumerable <double> targetValues; double originalR2 = -1; OnlineCalculatorError error; switch (data) { case DataPartitionEnum.All: rows = solution.ProblemData.AllIndices; targetValues = problemData.TargetVariableValues.ToList(); originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation."); } originalR2 = originalR2 * originalR2; break; case DataPartitionEnum.Training: rows = problemData.TrainingIndices; targetValues = problemData.TargetVariableTrainingValues.ToList(); originalR2 = solution.TrainingRSquared; break; case DataPartitionEnum.Test: rows = problemData.TestIndices; targetValues = problemData.TargetVariableTestValues.ToList(); originalR2 = solution.TestRSquared; break; default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data)); } var impacts = new Dictionary <string, double>(); var modifiableDataset = ((Dataset)dataset).ToModifiable(); var inputvariables = new HashSet <string>(problemData.AllowedInputVariables.Union(solution.Model.VariablesUsedForPrediction)); var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList(); // calculate impacts for double variables foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType <double>)) { var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation with replaced inputs."); } newR2 = newR2 * newR2; var impact = originalR2 - newR2; impacts[inputVariable] = impact; } // calculate impacts for string variables foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType <string>)) { if (factorReplacementMethod == FactorReplacementMethodEnum.Best) { // try replacing with all possible values and find the best replacement value var smallestImpact = double.PositiveInfinity; foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) { var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, Enumerable.Repeat(repl, dataset.Rows)); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation with replaced inputs."); } newR2 = newR2 * newR2; var impact = originalR2 - newR2; if (impact < smallestImpact) { smallestImpact = impact; } } impacts[inputVariable] = smallestImpact; } else { // for replacement methods shuffle and mode // calculate impacts for factor variables var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, factorReplacementMethod); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation with replaced inputs."); } newR2 = newR2 * newR2; var impact = originalR2 - newR2; impacts[inputVariable] = impact; } } // foreach return(impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value))); }
public static IEnumerable<Tuple<string, double>> CalculateImpacts(IRegressionSolution solution, DataPartitionEnum data = DataPartitionEnum.Training, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) { var problemData = solution.ProblemData; var dataset = problemData.Dataset; IEnumerable<int> rows; IEnumerable<double> targetValues; double originalR2 = -1; OnlineCalculatorError error; switch (data) { case DataPartitionEnum.All: rows = solution.ProblemData.AllIndices; targetValues = problemData.TargetVariableValues.ToList(); originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error); if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation."); originalR2 = originalR2 * originalR2; break; case DataPartitionEnum.Training: rows = problemData.TrainingIndices; targetValues = problemData.TargetVariableTrainingValues.ToList(); originalR2 = solution.TrainingRSquared; break; case DataPartitionEnum.Test: rows = problemData.TestIndices; targetValues = problemData.TargetVariableTestValues.ToList(); originalR2 = solution.TestRSquared; break; default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data)); } var impacts = new Dictionary<string, double>(); var modifiableDataset = ((Dataset)dataset).ToModifiable(); foreach (var inputVariable in problemData.AllowedInputVariables) { var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs."); newR2 = newR2 * newR2; var impact = originalR2 - newR2; impacts[inputVariable] = impact; } return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value)); }