public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData)
        {
            if (problemData == null)
            {
                throw new ArgumentNullException("problemData", "The provided problemData is null.");
            }
            ClassificationProblemData classificationProblemData = problemData as ClassificationProblemData;

            if (classificationProblemData == null)
            {
                throw new ArgumentException("The problem data is not a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
            }

            base.AdjustProblemDataProperties(problemData);
            TargetVariable = classificationProblemData.TargetVariable;
            for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++)
            {
                ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i);
            }

            PositiveClass = classificationProblemData.PositiveClass;

            for (int i = 0; i < Classes; i++)
            {
                for (int j = 0; j < Classes; j++)
                {
                    ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]);
                }
            }
        }
        static ClassificationProblemData()
        {
            defaultDataset             = new Dataset(defaultVariableNames, defaultData);
            defaultDataset.Name        = "Wisconsin classification problem";
            defaultDataset.Description = "subset from to ..";

            defaultAllowedInputVariables = defaultVariableNames.Except(new List <string>()
            {
                "sample", "class"
            });
            defaultTargetVariable = "class";

            var problemData = new ClassificationProblemData();

            problemData.Parameters.Clear();
            problemData.Name        = "Empty Classification ProblemData";
            problemData.Description = "This ProblemData acts as place holder before the correct problem data is loaded.";
            problemData.isEmpty     = true;

            problemData.Parameters.Add(new FixedValueParameter <Dataset>(DatasetParameterName, "", new Dataset()));
            problemData.Parameters.Add(new FixedValueParameter <ReadOnlyCheckedItemList <StringValue> >(InputVariablesParameterName, ""));
            problemData.Parameters.Add(new FixedValueParameter <IntRange>(TrainingPartitionParameterName, "", (IntRange) new IntRange(0, 0).AsReadOnly()));
            problemData.Parameters.Add(new FixedValueParameter <IntRange>(TestPartitionParameterName, "", (IntRange) new IntRange(0, 0).AsReadOnly()));
            problemData.Parameters.Add(new ConstrainedValueParameter <StringValue>(TargetVariableParameterName, new ItemSet <StringValue>()));
            problemData.Parameters.Add(new FixedValueParameter <StringMatrix>(ClassNamesParameterName, "", new StringMatrix(0, 0).AsReadOnly()));
            problemData.Parameters.Add(new FixedValueParameter <DoubleMatrix>(ClassificationPenaltiesParameterName, "", (DoubleMatrix) new DoubleMatrix(0, 0).AsReadOnly()));
            emptyProblemData = problemData;
        }
        private static double CalculateQualityForReplacement(
            IClassificationModel model,
            ModifiableDataset modifiableDataset,
            string variableName,
            IList originalValues,
            IEnumerable <int> rows,
            IList replacementValues,
            IEnumerable <double> targetValues)
        {
            modifiableDataset.ReplaceVariable(variableName, replacementValues);
            var discModel = model as IDiscriminantFunctionClassificationModel;

            if (discModel != null)
            {
                var problemData = new ClassificationProblemData(modifiableDataset, modifiableDataset.VariableNames, model.TargetVariable);
                discModel.RecalculateModelParameters(problemData, rows);
            }

            //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
            var estimates = model.GetEstimatedClassValues(modifiableDataset, rows).ToList();
            var ret       = CalculateQuality(targetValues, estimates);

            modifiableDataset.ReplaceVariable(variableName, originalValues);

            return(ret);
        }
    public override IClassificationProblemData ImportData(string path) {
      TableFileParser csvFileParser = new TableFileParser();

      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
      string targetVar = dataset.DoubleVariables.Last();

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
            variableName != targetVar)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
      }

      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);

      int trainingPartEnd = trainingIndizes.Last();
      classificationData.TrainingPartition.Start = trainingIndizes.First();
      classificationData.TrainingPartition.End = trainingPartEnd;
      classificationData.TestPartition.Start = trainingPartEnd;
      classificationData.TestPartition.End = csvFileParser.Rows;

      classificationData.Name = Path.GetFileName(path);

      return classificationData;
    }
Ejemplo n.º 5
0
 private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
   var targetVariable = oldProblemData.TargetVariable;
   if (!context.Data.VariableNames.Contains(targetVariable))
     targetVariable = context.Data.VariableNames.First();
   var inputVariables = GetDoubleInputVariables(targetVariable);
   var newProblemData = new ClassificationProblemData(ExportedDataset, inputVariables, targetVariable, Transformations);
   newProblemData.PositiveClass = oldProblemData.PositiveClass;
   return newProblemData;
 }
 public IClassificationProblemData GenerateClassificationData(Dataset dataset) {
   IClassificationProblemData claData = new ClassificationProblemData(dataset, AllowedInputVariables, TargetVariable);
   claData.Name = this.Name;
   claData.Description = this.Description;
   claData.TrainingPartition.Start = this.TrainingPartitionStart;
   claData.TrainingPartition.End = this.TrainingPartitionEnd;
   claData.TestPartition.Start = this.TestPartitionStart;
   claData.TestPartition.End = this.TestPartitionEnd;
   return claData;
 }
 protected ClassificationProblemData(ClassificationProblemData original, Cloner cloner)
     : base(original, cloner)
 {
     RegisterParameterEvents();
     classNamesCache = new List <string>();
     for (int i = 0; i < ClassNamesParameter.Value.Rows; i++)
     {
         classNamesCache.Add(ClassNamesParameter.Value[i, 0]);
     }
 }
        protected override void OnProblemDataChanged()
        {
            trainingEvaluationCache.Clear();
            testEvaluationCache.Clear();
            evaluationCache.Clear();

            IClassificationProblemData problemData = new ClassificationProblemData(ProblemData.Dataset,
                                                                                   ProblemData.AllowedInputVariables,
                                                                                   ProblemData.TargetVariable);

            problemData.TrainingPartition.Start = ProblemData.TrainingPartition.Start;
            problemData.TrainingPartition.End   = ProblemData.TrainingPartition.End;
            problemData.TestPartition.Start     = ProblemData.TestPartition.Start;
            problemData.TestPartition.End       = ProblemData.TestPartition.End;

            foreach (var solution in ClassificationSolutions)
            {
                if (solution is ClassificationEnsembleSolution)
                {
                    solution.ProblemData = ProblemData;
                }
                else
                {
                    solution.ProblemData = problemData;
                }
            }
            foreach (var trainingPartition in trainingPartitions.Values)
            {
                trainingPartition.Start = ProblemData.TrainingPartition.Start;
                trainingPartition.End   = ProblemData.TrainingPartition.End;
            }
            foreach (var testPartition in testPartitions.Values)
            {
                testPartition.Start = ProblemData.TestPartition.Start;
                testPartition.End   = ProblemData.TestPartition.End;
            }

            base.OnProblemDataChanged();
        }
    protected override void Run(CancellationToken cancellationToken) {
      // Set up the algorithm
      if (SetSeedRandomly) Seed = new System.Random().Next();

      // Set up the results display
      var iterations = new IntValue(0);
      Results.Add(new Result("Iterations", iterations));

      var table = new DataTable("Qualities");
      table.Rows.Add(new DataRow("Loss (train)"));
      table.Rows.Add(new DataRow("Loss (test)"));
      Results.Add(new Result("Qualities", table));
      var curLoss = new DoubleValue();
      Results.Add(new Result("Loss (train)", curLoss));

      // init
      var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
      var lossFunction = LossFunctionParameter.Value;
      var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, lossFunction, (uint)Seed, MaxSize, R, M, Nu);

      var updateInterval = UpdateIntervalParameter.Value.Value;
      // Loop until iteration limit reached or canceled.
      for (int i = 0; i < Iterations; i++) {
        cancellationToken.ThrowIfCancellationRequested();

        GradientBoostedTreesAlgorithmStatic.MakeStep(state);

        // iteration results
        if (i % updateInterval == 0) {
          curLoss.Value = state.GetTrainLoss();
          table.Rows["Loss (train)"].Values.Add(curLoss.Value);
          table.Rows["Loss (test)"].Values.Add(state.GetTestLoss());
          iterations.Value = i;
        }
      }

      // final results
      iterations.Value = Iterations;
      curLoss.Value = state.GetTrainLoss();
      table.Rows["Loss (train)"].Values.Add(curLoss.Value);
      table.Rows["Loss (test)"].Values.Add(state.GetTestLoss());

      // produce variable relevance
      var orderedImpacts = state.GetVariableRelevance().Select(t => new { name = t.Key, impact = t.Value }).ToList();

      var impacts = new DoubleMatrix();
      var matrix = impacts as IStringConvertibleMatrix;
      matrix.Rows = orderedImpacts.Count;
      matrix.RowNames = orderedImpacts.Select(x => x.name);
      matrix.Columns = 1;
      matrix.ColumnNames = new string[] { "Relative variable relevance" };

      int rowIdx = 0;
      foreach (var p in orderedImpacts) {
        matrix.SetValue(string.Format("{0:N2}", p.impact), rowIdx++, 0);
      }

      Results.Add(new Result("Variable relevance", impacts));
      Results.Add(new Result("Loss (test)", new DoubleValue(state.GetTestLoss())));

      // produce solution 
      if (CreateSolution) {
        var model = state.GetModel();

        // for logistic regression we produce a classification solution
        if (lossFunction is LogisticRegressionLoss) {
          var classificationModel = new DiscriminantFunctionClassificationModel(model,
            new AccuracyMaximizationThresholdCalculator());
          var classificationProblemData = new ClassificationProblemData(problemData.Dataset,
            problemData.AllowedInputVariables, problemData.TargetVariable, problemData.Transformations);
          classificationModel.RecalculateModelParameters(classificationProblemData, classificationProblemData.TrainingIndices);

          var classificationSolution = new DiscriminantFunctionClassificationSolution(classificationModel, classificationProblemData);
          Results.Add(new Result("Solution", classificationSolution));
        } else {
          // otherwise we produce a regression solution
          Results.Add(new Result("Solution", new RegressionSolution(model, problemData)));
        }
      }
    }
    protected override void OnProblemDataChanged() {
      trainingEvaluationCache.Clear();
      testEvaluationCache.Clear();
      evaluationCache.Clear();

      IClassificationProblemData problemData = new ClassificationProblemData(ProblemData.Dataset,
                                                                     ProblemData.AllowedInputVariables,
                                                                     ProblemData.TargetVariable);
      problemData.TrainingPartition.Start = ProblemData.TrainingPartition.Start;
      problemData.TrainingPartition.End = ProblemData.TrainingPartition.End;
      problemData.TestPartition.Start = ProblemData.TestPartition.Start;
      problemData.TestPartition.End = ProblemData.TestPartition.End;

      foreach (var solution in ClassificationSolutions) {
        if (solution is ClassificationEnsembleSolution)
          solution.ProblemData = ProblemData;
        else
          solution.ProblemData = problemData;
      }
      foreach (var trainingPartition in trainingPartitions.Values) {
        trainingPartition.Start = ProblemData.TrainingPartition.Start;
        trainingPartition.End = ProblemData.TrainingPartition.End;
      }
      foreach (var testPartition in testPartitions.Values) {
        testPartition.Start = ProblemData.TestPartition.Start;
        testPartition.End = ProblemData.TestPartition.End;
      }

      base.OnProblemDataChanged();
    }
Ejemplo n.º 11
0
 protected ClassificationProblemData(ClassificationProblemData original, Cloner cloner)
     : base(original, cloner)
 {
     RegisterParameterEvents();
 }
    protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) {
      int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100;
      List<IList> values = csvFileParser.Values;
      if (type.Shuffle) {
        values = Shuffle(values);
        if (type.UniformlyDistributeClasses) {
          values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
                           type.TrainingPercentage, out trainingPartEnd);
        }
      }

      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
            variableName != type.TargetVariable)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
      }

      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable);

      classificationData.TrainingPartition.Start = 0;
      classificationData.TrainingPartition.End = trainingPartEnd;
      classificationData.TestPartition.Start = trainingPartEnd;
      classificationData.TestPartition.End = csvFileParser.Rows;

      classificationData.Name = Path.GetFileName(path);

      return classificationData;
    }
Ejemplo n.º 13
0
 protected ClassificationProblemData(ClassificationProblemData original, Cloner cloner)
   : base(original, cloner) {
   RegisterParameterEvents();
   classNamesCache = new List<string>();
   for (int i = 0; i < ClassNamesParameter.Value.Rows; i++)
     classNamesCache.Add(ClassNamesParameter.Value[i, 0]);
 }
Ejemplo n.º 14
0
    static ClassificationProblemData() {
      defaultDataset = new Dataset(defaultVariableNames, defaultData);
      defaultDataset.Name = "Wisconsin classification problem";
      defaultDataset.Description = "subset from to ..";

      defaultAllowedInputVariables = defaultVariableNames.Except(new List<string>() { "sample", "class" });
      defaultTargetVariable = "class";

      var problemData = new ClassificationProblemData();
      problemData.Parameters.Clear();
      problemData.Name = "Empty Classification ProblemData";
      problemData.Description = "This ProblemData acts as place holder before the correct problem data is loaded.";
      problemData.isEmpty = true;

      problemData.Parameters.Add(new FixedValueParameter<Dataset>(DatasetParameterName, "", new Dataset()));
      problemData.Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>(InputVariablesParameterName, ""));
      problemData.Parameters.Add(new FixedValueParameter<IntRange>(TrainingPartitionParameterName, "", (IntRange)new IntRange(0, 0).AsReadOnly()));
      problemData.Parameters.Add(new FixedValueParameter<IntRange>(TestPartitionParameterName, "", (IntRange)new IntRange(0, 0).AsReadOnly()));
      problemData.Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>()));
      problemData.Parameters.Add(new FixedValueParameter<StringMatrix>(ClassNamesParameterName, "", new StringMatrix(0, 0).AsReadOnly()));
      problemData.Parameters.Add(new FixedValueParameter<DoubleMatrix>(ClassificationPenaltiesParameterName, "", (DoubleMatrix)new DoubleMatrix(0, 0).AsReadOnly()));
      emptyProblemData = problemData;
    }
 protected ClassificationProblemData(ClassificationProblemData original, Cloner cloner)
   : base(original, cloner) {
   RegisterParameterEvents();
 }
 public ClassificationProblem()
     : base()
 {
     ProblemData = new ClassificationProblemData();
 }