public override IClusteringProblemData ImportData(string path) {
      var csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables);
      }

      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

      int trainingPartEnd = trainingIndizes.Last();
      clusteringData.TrainingPartition.Start = trainingIndizes.First();
      clusteringData.TrainingPartition.End = trainingPartEnd;
      clusteringData.TestPartition.Start = trainingPartEnd;
      clusteringData.TestPartition.End = csvFileParser.Rows;

      clusteringData.Name = Path.GetFileName(path);

      return clusteringData;
    }
    public override IRegressionProblemData LoadData(IDataDescriptor id) {
      var descriptor = (ResourceRegressionDataDescriptor)id;

      var instanceArchiveName = GetResourceName(FileName + @"\.zip");
      using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) {
        var entry = instancesZipFile.GetEntry(descriptor.ResourceName);
        NumberFormatInfo numberFormat;
        DateTimeFormatInfo dateFormat;
        char separator;
        using (Stream stream = entry.Open()) {
          TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator);
        }

        TableFileParser csvFileParser = new TableFileParser();
        using (Stream stream = entry.Open()) {
          csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true);
        }

        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
        if (!descriptor.CheckVariableNames(csvFileParser.VariableNames)) {
          throw new ArgumentException("Parsed file contains variables which are not in the descriptor.");
        }

        return descriptor.GenerateRegressionData(dataset);
      }
    }
    public override IRegressionProblemData ImportData(string path) {
      TableFileParser csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
      string targetVar = dataset.DoubleVariables.Last();

      // turn off input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
            variableName != targetVar)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
      }

      IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);

      var trainingPartEnd = trainingIndizes.Last();
      regressionData.TrainingPartition.Start = trainingIndizes.First();
      regressionData.TrainingPartition.End = trainingPartEnd;
      regressionData.TestPartition.Start = trainingPartEnd;
      regressionData.TestPartition.End = csvFileParser.Rows;

      regressionData.Name = Path.GetFileName(path);

      return regressionData;
    }
    public override ITimeSeriesPrognosisProblemData ImportData(string path) {
      TableFileParser csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
      string targetVar = csvFileParser.VariableNames.Last();

      IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));

      ITimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar);

      int trainingPartEnd = csvFileParser.Rows * 2 / 3;
      timeSeriesPrognosisData.TrainingPartition.Start = 0;
      timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd;
      timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd;
      timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows;

      int pos = path.LastIndexOf('\\');
      if (pos < 0)
        timeSeriesPrognosisData.Name = path;
      else {
        pos++;
        timeSeriesPrognosisData.Name = path.Substring(pos, path.Length - pos);
      }
      return timeSeriesPrognosisData;
    }
    protected override ITimeSeriesPrognosisProblemData ImportData(string path, TimeSeriesPrognosisImportType type, TableFileParser csvFileParser) {
      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100;
      trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1;
      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
            variableName != type.TargetVariable)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
      }

      TimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, type.TargetVariable);

      timeSeriesPrognosisData.TrainingPartition.Start = 0;
      timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd;
      timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd;
      timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows;

      timeSeriesPrognosisData.Name = Path.GetFileName(path);

      return timeSeriesPrognosisData;
    }
    protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) {
      List<IList> values = csvFileParser.Values;
      if (type.Shuffle) {
        values = Shuffle(values);
      }

      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100;
      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables);
      }

      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

      clusteringData.TrainingPartition.Start = 0;
      clusteringData.TrainingPartition.End = trainingPartEnd;
      clusteringData.TestPartition.Start = trainingPartEnd;
      clusteringData.TestPartition.End = csvFileParser.Rows;

      clusteringData.Name = Path.GetFileName(path);

      return clusteringData;
    }
 protected virtual TData ImportData(string path, ImportType type, TableFileParser csvFileParser)
 {
     throw new NotSupportedException();
 }
        protected override ITimeSeriesPrognosisProblemData ImportData(string path, TimeSeriesPrognosisImportType type, TableFileParser csvFileParser)
        {
            Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

            // turn of input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            int trainingPartEnd  = (csvFileParser.Rows * type.TrainingPercentage) / 100;

            trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1;
            var trainingIndizes = Enumerable.Range(0, trainingPartEnd);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
                        variableName != type.TargetVariable)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
            }

            TimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, type.TargetVariable);

            timeSeriesPrognosisData.TrainingPartition.Start = 0;
            timeSeriesPrognosisData.TrainingPartition.End   = trainingPartEnd;
            timeSeriesPrognosisData.TestPartition.Start     = trainingPartEnd;
            timeSeriesPrognosisData.TestPartition.End       = csvFileParser.Rows;

            timeSeriesPrognosisData.Name = Path.GetFileName(path);

            return(timeSeriesPrognosisData);
        }
        protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser)
        {
            List <IList> values = csvFileParser.Values;

            if (type.Shuffle)
            {
                values = Shuffle(values);
            }

            Dataset dataset = new Dataset(csvFileParser.VariableNames, values);

            // turn of input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            int trainingPartEnd  = (csvFileParser.Rows * type.TrainingPercentage) / 100;
            var trainingIndizes  = Enumerable.Range(0, trainingPartEnd);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables);
            }

            ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

            clusteringData.TrainingPartition.Start = 0;
            clusteringData.TrainingPartition.End   = trainingPartEnd;
            clusteringData.TestPartition.Start     = trainingPartEnd;
            clusteringData.TestPartition.End       = csvFileParser.Rows;

            clusteringData.Name = Path.GetFileName(path);

            return(clusteringData);
        }
    protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) {
      int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100;
      List<IList> values = csvFileParser.Values;
      if (type.Shuffle) {
        values = Shuffle(values);
        if (type.UniformlyDistributeClasses) {
          values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
                           type.TrainingPercentage, out trainingPartEnd);
        }
      }

      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
            variableName != type.TargetVariable)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
      }

      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable);

      classificationData.TrainingPartition.Start = 0;
      classificationData.TrainingPartition.End = trainingPartEnd;
      classificationData.TestPartition.Start = trainingPartEnd;
      classificationData.TestPartition.End = csvFileParser.Rows;

      classificationData.Name = Path.GetFileName(path);

      return classificationData;
    }
        protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser)
        {
            int          trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100;
            List <IList> values          = csvFileParser.Values;

            if (type.Shuffle)
            {
                values = Shuffle(values);
                if (type.UniformlyDistributeClasses)
                {
                    values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)),
                                     type.TrainingPercentage, out trainingPartEnd);
                }
            }

            Dataset dataset = new Dataset(csvFileParser.VariableNames, values);

            // turn of input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            var trainingIndizes  = Enumerable.Range(0, trainingPartEnd);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
                        variableName != type.TargetVariable)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
            }

            ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable);

            classificationData.TrainingPartition.Start = 0;
            classificationData.TrainingPartition.End   = trainingPartEnd;
            classificationData.TestPartition.Start     = trainingPartEnd;
            classificationData.TestPartition.End       = csvFileParser.Rows;

            classificationData.Name = Path.GetFileName(path);

            return(classificationData);
        }