private static double CalculateQualityForReplacement(
            IClassificationModel model,
            ModifiableDataset modifiableDataset,
            string variableName,
            IList originalValues,
            IEnumerable <int> rows,
            IList replacementValues,
            IEnumerable <double> targetValues)
        {
            modifiableDataset.ReplaceVariable(variableName, replacementValues);
            var discModel = model as IDiscriminantFunctionClassificationModel;

            if (discModel != null)
            {
                var problemData = new ClassificationProblemData(modifiableDataset, modifiableDataset.VariableNames, model.TargetVariable);
                discModel.RecalculateModelParameters(problemData, rows);
            }

            //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
            var estimates = model.GetEstimatedClassValues(modifiableDataset, rows).ToList();
            var ret       = CalculateQuality(targetValues, estimates);

            modifiableDataset.ReplaceVariable(variableName, originalValues);

            return(ret);
        }
Example #2
0
        private static IEnumerable <double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable,
                                                                              ModifiableDataset dataset, IEnumerable <int> rows, IEnumerable <string> replacementValues)
        {
            var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();

            dataset.ReplaceVariable(variable, replacementValues.ToList());
            //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
            var estimates = model.GetEstimatedValues(dataset, rows).ToList();

            dataset.ReplaceVariable(variable, originalValues);

            return(estimates);
        }
Example #3
0
        private static double CalculateQualityForReplacement(
            IRegressionModel model,
            ModifiableDataset modifiableDataset,
            string variableName,
            IList originalValues,
            IEnumerable <int> rows,
            IList replacementValues,
            IEnumerable <double> targetValues)
        {
            modifiableDataset.ReplaceVariable(variableName, replacementValues);
            //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
            var estimates = model.GetEstimatedValues(modifiableDataset, rows).ToList();
            var ret       = CalculateQuality(targetValues, estimates);

            modifiableDataset.ReplaceVariable(variableName, originalValues);

            return(ret);
        }
        private static IEnumerable <double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable <int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median)
        {
            var           originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
            double        replacementValue;
            List <double> replacementValues;
            IRandom       rand;

            switch (replacement)
            {
            case ReplacementMethodEnum.Median:
                replacementValue  = rows.Select(r => originalValues[r]).Median();
                replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
                break;

            case ReplacementMethodEnum.Average:
                replacementValue  = rows.Select(r => originalValues[r]).Average();
                replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
                break;

            case ReplacementMethodEnum.Shuffle:
                // new var has same empirical distribution but the relation to y is broken
                rand = new FastRandom(31415);
                replacementValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
                break;

            case ReplacementMethodEnum.Noise:
                var avg    = rows.Select(r => originalValues[r]).Average();
                var stdDev = rows.Select(r => originalValues[r]).StandardDeviation();
                rand = new FastRandom(31415);
                replacementValues = rows.Select(_ => NormalDistributedRandom.NextDouble(rand, avg, stdDev)).ToList();
                break;

            default:
                throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement));
            }

            dataset.ReplaceVariable(variable, replacementValues);
            //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
            var estimates = model.GetEstimatedValues(dataset, rows).ToList();

            dataset.ReplaceVariable(variable, originalValues);

            return(estimates);
        }