Beispiel #1
0
        private static IList GetReplacementValues(ModifiableDataset modifiableDataset,
                                                  string variableName,
                                                  IRegressionModel model,
                                                  IEnumerable <int> rows,
                                                  IEnumerable <double> targetValues,
                                                  out IList originalValues,
                                                  ReplacementMethodEnum replacementMethod             = ReplacementMethodEnum.Shuffle,
                                                  FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best)
        {
            IList replacementValues = null;

            if (modifiableDataset.VariableHasType <double>(variableName))
            {
                originalValues    = modifiableDataset.GetReadOnlyDoubleValues(variableName).ToList();
                replacementValues = GetReplacementValuesForDouble(modifiableDataset, rows, (List <double>)originalValues, replacementMethod);
            }
            else if (modifiableDataset.VariableHasType <string>(variableName))
            {
                originalValues    = modifiableDataset.GetReadOnlyStringValues(variableName).ToList();
                replacementValues = GetReplacementValuesForString(model, modifiableDataset, variableName, rows, (List <string>)originalValues, targetValues, factorReplacementMethod);
            }
            else
            {
                throw new NotSupportedException("Variable not supported");
            }

            return(replacementValues);
        }
Beispiel #2
0
        private static IEnumerable <double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable,
                                                                              ModifiableDataset dataset, IEnumerable <int> rows, IEnumerable <string> replacementValues)
        {
            var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();

            dataset.ReplaceVariable(variable, replacementValues.ToList());
            //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
            var estimates = model.GetEstimatedValues(dataset, rows).ToList();

            dataset.ReplaceVariable(variable, originalValues);

            return(estimates);
        }
Beispiel #3
0
        private static IEnumerable <double> EvaluateModelWithReplacedVariable(
            IRegressionModel model, string variable, ModifiableDataset dataset,
            IEnumerable <int> rows,
            FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Shuffle)
        {
            var           originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
            List <string> replacementValues;
            IRandom       rand;

            switch (replacement)
            {
            case FactorReplacementMethodEnum.Mode:
                var mostCommonValue = rows.Select(r => originalValues[r])
                                      .GroupBy(v => v)
                                      .OrderByDescending(g => g.Count())
                                      .First().Key;
                replacementValues = Enumerable.Repeat(mostCommonValue, dataset.Rows).ToList();
                break;

            case FactorReplacementMethodEnum.Shuffle:
                // new var has same empirical distribution but the relation to y is broken
                rand = new FastRandom(31415);
                // prepare a complete column for the dataset
                replacementValues = Enumerable.Repeat(string.Empty, dataset.Rows).ToList();
                // shuffle only the selected rows
                var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
                int i = 0;
                // update column values
                foreach (var r in rows)
                {
                    replacementValues[r] = shuffledValues[i++];
                }
                break;

            default:
                throw new ArgumentException(string.Format("FactorReplacementMethod {0} cannot be handled.", replacement));
            }

            return(EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues));
        }