private static double CalculateQualityForReplacement( IClassificationModel model, ModifiableDataset modifiableDataset, string variableName, IList originalValues, IEnumerable <int> rows, IList replacementValues, IEnumerable <double> targetValues) { modifiableDataset.ReplaceVariable(variableName, replacementValues); var discModel = model as IDiscriminantFunctionClassificationModel; if (discModel != null) { var problemData = new ClassificationProblemData(modifiableDataset, modifiableDataset.VariableNames, model.TargetVariable); discModel.RecalculateModelParameters(problemData, rows); } //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements var estimates = model.GetEstimatedClassValues(modifiableDataset, rows).ToList(); var ret = CalculateQuality(targetValues, estimates); modifiableDataset.ReplaceVariable(variableName, originalValues); return(ret); }
private static IEnumerable <double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable <int> rows, IEnumerable <string> replacementValues) { var originalValues = dataset.GetReadOnlyStringValues(variable).ToList(); dataset.ReplaceVariable(variable, replacementValues.ToList()); //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements var estimates = model.GetEstimatedValues(dataset, rows).ToList(); dataset.ReplaceVariable(variable, originalValues); return(estimates); }
private static double CalculateQualityForReplacement( IRegressionModel model, ModifiableDataset modifiableDataset, string variableName, IList originalValues, IEnumerable <int> rows, IList replacementValues, IEnumerable <double> targetValues) { modifiableDataset.ReplaceVariable(variableName, replacementValues); //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements var estimates = model.GetEstimatedValues(modifiableDataset, rows).ToList(); var ret = CalculateQuality(targetValues, estimates); modifiableDataset.ReplaceVariable(variableName, originalValues); return(ret); }
private static IEnumerable <double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable <int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) { var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList(); double replacementValue; List <double> replacementValues; IRandom rand; switch (replacement) { case ReplacementMethodEnum.Median: replacementValue = rows.Select(r => originalValues[r]).Median(); replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList(); break; case ReplacementMethodEnum.Average: replacementValue = rows.Select(r => originalValues[r]).Average(); replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList(); break; case ReplacementMethodEnum.Shuffle: // new var has same empirical distribution but the relation to y is broken rand = new FastRandom(31415); replacementValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList(); break; case ReplacementMethodEnum.Noise: var avg = rows.Select(r => originalValues[r]).Average(); var stdDev = rows.Select(r => originalValues[r]).StandardDeviation(); rand = new FastRandom(31415); replacementValues = rows.Select(_ => NormalDistributedRandom.NextDouble(rand, avg, stdDev)).ToList(); break; default: throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement)); } dataset.ReplaceVariable(variable, replacementValues); //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements var estimates = model.GetEstimatedValues(dataset, rows).ToList(); dataset.ReplaceVariable(variable, originalValues); return(estimates); }