private static double CalculateQualityForReplacement( IClassificationModel model, ModifiableDataset modifiableDataset, string variableName, IList originalValues, IEnumerable <int> rows, IList replacementValues, IEnumerable <double> targetValues) { modifiableDataset.ReplaceVariable(variableName, replacementValues); var discModel = model as IDiscriminantFunctionClassificationModel; if (discModel != null) { var problemData = new ClassificationProblemData(modifiableDataset, modifiableDataset.VariableNames, model.TargetVariable); discModel.RecalculateModelParameters(problemData, rows); } //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements var estimates = model.GetEstimatedClassValues(modifiableDataset, rows).ToList(); var ret = CalculateQuality(targetValues, estimates); modifiableDataset.ReplaceVariable(variableName, originalValues); return(ret); }
public static double Calculate(IClassificationModel model, IClassificationProblemData problemData, IEnumerable<int> rows) { var estimations = model.GetEstimatedClassValues(problemData.Dataset, rows).GetEnumerator(); if (!estimations.MoveNext()) return double.NaN; var penalty = 0.0; var count = 0; foreach (var r in rows) { var actualClass = problemData.Dataset.GetDoubleValue(problemData.TargetVariable, r); penalty += problemData.GetClassificationPenalty(actualClass, estimations.Current); estimations.MoveNext(); count++; } return penalty / count; }
public static double Calculate(IClassificationModel model, IClassificationProblemData problemData, IEnumerable <int> rows) { var estimations = model.GetEstimatedClassValues(problemData.Dataset, rows).GetEnumerator(); if (!estimations.MoveNext()) { return(double.NaN); } var penalty = 0.0; var count = 0; foreach (var r in rows) { var actualClass = problemData.Dataset.GetDoubleValue(problemData.TargetVariable, r); penalty += problemData.GetClassificationPenalty(actualClass, estimations.Current); estimations.MoveNext(); count++; } return(penalty / count); }
public static double CalculateImpact(string variableName, IClassificationModel model, IClassificationProblemData problemData, ModifiableDataset modifiableDataset, IEnumerable <int> rows, ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, IEnumerable <double> targetValues = null, double quality = double.NaN) { if (!model.VariablesUsedForPrediction.Contains(variableName)) { return(0.0); } if (!problemData.Dataset.VariableNames.Contains(variableName)) { throw new InvalidOperationException(string.Format("Can not calculate variable impact, because the model uses inputs missing in the dataset ({0})", variableName)); } if (targetValues == null) { targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); } if (quality == double.NaN) { quality = CalculateQuality(model.GetEstimatedClassValues(modifiableDataset, rows), targetValues); } IList originalValues = null; IList replacementValues = GetReplacementValues(modifiableDataset, variableName, model, rows, targetValues, out originalValues, replacementMethod, factorReplacementMethod); double newValue = CalculateQualityForReplacement(model, modifiableDataset, variableName, originalValues, rows, replacementValues, targetValues); double impact = quality - newValue; return(impact); }