Example #1
0
        protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser)
        {
            int          trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100;
            List <IList> values          = csvFileParser.Values;

            if (type.Shuffle)
            {
                values = Shuffle(values);
                if (type.UniformlyDistributeClasses)
                {
                    values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
                                     type.TrainingPercentage, out trainingPartEnd);
                }
            }

            Dataset dataset = new Dataset(csvFileParser.VariableNames, values);

            // turn of input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            var trainingIndizes  = Enumerable.Range(0, trainingPartEnd);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
                        variableName != type.TargetVariable)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
            }

            ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable);

            classificationData.TrainingPartition.Start = 0;
            classificationData.TrainingPartition.End   = trainingPartEnd;
            classificationData.TestPartition.Start     = trainingPartEnd;
            classificationData.TestPartition.End       = csvFileParser.Rows;

            classificationData.Name = Path.GetFileName(path);

            return(classificationData);
        }
    protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) {
      int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100;
      List<IList> values = csvFileParser.Values;
      if (type.Shuffle) {
        values = Shuffle(values);
        if (type.UniformlyDistributeClasses) {
          values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
                           type.TrainingPercentage, out trainingPartEnd);
        }
      }

      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
            variableName != type.TargetVariable)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
      }

      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable);

      classificationData.TrainingPartition.Start = 0;
      classificationData.TrainingPartition.End = trainingPartEnd;
      classificationData.TestPartition.Start = trainingPartEnd;
      classificationData.TestPartition.End = csvFileParser.Rows;

      classificationData.Name = Path.GetFileName(path);

      return classificationData;
    }