コード例 #1
0
        private static IList GetReplacementValues(ModifiableDataset modifiableDataset,
                                                  string variableName,
                                                  IRegressionModel model,
                                                  IEnumerable <int> rows,
                                                  IEnumerable <double> targetValues,
                                                  out IList originalValues,
                                                  ReplacementMethodEnum replacementMethod             = ReplacementMethodEnum.Shuffle,
                                                  FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best)
        {
            IList replacementValues = null;

            if (modifiableDataset.VariableHasType <double>(variableName))
            {
                originalValues    = modifiableDataset.GetReadOnlyDoubleValues(variableName).ToList();
                replacementValues = GetReplacementValuesForDouble(modifiableDataset, rows, (List <double>)originalValues, replacementMethod);
            }
            else if (modifiableDataset.VariableHasType <string>(variableName))
            {
                originalValues    = modifiableDataset.GetReadOnlyStringValues(variableName).ToList();
                replacementValues = GetReplacementValuesForString(model, modifiableDataset, variableName, rows, (List <string>)originalValues, targetValues, factorReplacementMethod);
            }
            else
            {
                throw new NotSupportedException("Variable not supported");
            }

            return(replacementValues);
        }
コード例 #2
0
        private static IEnumerable <double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable <int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median)
        {
            var           originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
            double        replacementValue;
            List <double> replacementValues;
            IRandom       rand;

            switch (replacement)
            {
            case ReplacementMethodEnum.Median:
                replacementValue  = rows.Select(r => originalValues[r]).Median();
                replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
                break;

            case ReplacementMethodEnum.Average:
                replacementValue  = rows.Select(r => originalValues[r]).Average();
                replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
                break;

            case ReplacementMethodEnum.Shuffle:
                // new var has same empirical distribution but the relation to y is broken
                rand = new FastRandom(31415);
                // prepare a complete column for the dataset
                replacementValues = Enumerable.Repeat(double.NaN, dataset.Rows).ToList();
                // shuffle only the selected rows
                var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
                int i = 0;
                // update column values
                foreach (var r in rows)
                {
                    replacementValues[r] = shuffledValues[i++];
                }
                break;

            case ReplacementMethodEnum.Noise:
                var avg    = rows.Select(r => originalValues[r]).Average();
                var stdDev = rows.Select(r => originalValues[r]).StandardDeviation();
                rand = new FastRandom(31415);
                // prepare a complete column for the dataset
                replacementValues = Enumerable.Repeat(double.NaN, dataset.Rows).ToList();
                // update column values
                foreach (var r in rows)
                {
                    replacementValues[r] = NormalDistributedRandom.NextDouble(rand, avg, stdDev);
                }
                break;

            default:
                throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement));
            }

            return(EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues));
        }
コード例 #3
0
        private static IEnumerable <double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable,
                                                                              ModifiableDataset dataset, IEnumerable <int> rows, IEnumerable <double> replacementValues)
        {
            var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();

            dataset.ReplaceVariable(variable, replacementValues.ToList());
            //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
            var estimates = model.GetEstimatedValues(dataset, rows).ToList();

            dataset.ReplaceVariable(variable, originalValues);

            return(estimates);
        }
        private static IEnumerable <double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable <int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median)
        {
            var           originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
            double        replacementValue;
            List <double> replacementValues;
            IRandom       rand;

            switch (replacement)
            {
            case ReplacementMethodEnum.Median:
                replacementValue  = rows.Select(r => originalValues[r]).Median();
                replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
                break;

            case ReplacementMethodEnum.Average:
                replacementValue  = rows.Select(r => originalValues[r]).Average();
                replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
                break;

            case ReplacementMethodEnum.Shuffle:
                // new var has same empirical distribution but the relation to y is broken
                rand = new FastRandom(31415);
                replacementValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
                break;

            case ReplacementMethodEnum.Noise:
                var avg    = rows.Select(r => originalValues[r]).Average();
                var stdDev = rows.Select(r => originalValues[r]).StandardDeviation();
                rand = new FastRandom(31415);
                replacementValues = rows.Select(_ => NormalDistributedRandom.NextDouble(rand, avg, stdDev)).ToList();
                break;

            default:
                throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement));
            }

            dataset.ReplaceVariable(variable, replacementValues);
            //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
            var estimates = model.GetEstimatedValues(dataset, rows).ToList();

            dataset.ReplaceVariable(variable, originalValues);

            return(estimates);
        }