public TData ImportData(string path, ImportType type, DataAnalysisCSVFormat csvFormat)
        {
            TableFileParser csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFormat.NumberFormatInfo, csvFormat.DateTimeFormatInfo, csvFormat.Separator, csvFormat.VariableNamesAvailable);
            return(ImportData(path, type, csvFileParser));
        }
        public override IRegressionProblemData LoadData(IDataDescriptor id)
        {
            var descriptor = (ResourceRegressionDataDescriptor)id;

            var instanceArchiveName = GetResourceName(FileName + @"\.zip");

            using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) {
                var entry = instancesZipFile.GetEntry(descriptor.ResourceName);
                NumberFormatInfo   numberFormat;
                DateTimeFormatInfo dateFormat;
                char separator;
                using (Stream stream = entry.Open()) {
                    TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator);
                }

                TableFileParser csvFileParser = new TableFileParser();
                using (Stream stream = entry.Open()) {
                    csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true);
                }

                Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
                if (!descriptor.CheckVariableNames(csvFileParser.VariableNames))
                {
                    throw new ArgumentException("Parsed file contains variables which are not in the descriptor.");
                }

                return(descriptor.GenerateRegressionData(dataset));
            }
        }
    public override IClusteringProblemData ImportData(string path) {
      var csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables);
      }

      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

      int trainingPartEnd = trainingIndizes.Last();
      clusteringData.TrainingPartition.Start = trainingIndizes.First();
      clusteringData.TrainingPartition.End = trainingPartEnd;
      clusteringData.TestPartition.Start = trainingPartEnd;
      clusteringData.TestPartition.End = csvFileParser.Rows;

      clusteringData.Name = Path.GetFileName(path);

      return clusteringData;
    }
        public override ILogModellingProblemData ImportData(string path)
        {
            var csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            var dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

            string caseIDVar    = dataset.VariableNames.First();
            string timestampVar = dataset.VariableNames.First();
            string activityVar  = dataset.VariableNames.First();


            ILogModellingProblemData logData = new LogModellingProblemData(dataset, caseIDVar, timestampVar, activityVar);

            IEnumerable <int> trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
            int trainingPartEnd = trainingIndizes.Last();

            //TODO: when (if not removed) separating test and training, group by caseid
            logData.TrainingPartition.Start = trainingIndizes.First();
            logData.TrainingPartition.End   = trainingPartEnd;
            logData.TestPartition.Start     = trainingPartEnd;
            logData.TestPartition.End       = csvFileParser.Rows;

            logData.Name = Path.GetFileName(path);

            return(logData);
        }
    public override IRegressionProblemData ImportData(string path) {
      TableFileParser csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
      string targetVar = dataset.DoubleVariables.Last();

      // turn off input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
            variableName != targetVar)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
      }

      IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);

      var trainingPartEnd = trainingIndizes.Last();
      regressionData.TrainingPartition.Start = trainingIndizes.First();
      regressionData.TrainingPartition.End = trainingPartEnd;
      regressionData.TestPartition.Start = trainingPartEnd;
      regressionData.TestPartition.End = csvFileParser.Rows;

      regressionData.Name = Path.GetFileName(path);

      return regressionData;
    }
    public override IRegressionProblemData LoadData(IDataDescriptor id) {
      var descriptor = (ResourceRegressionDataDescriptor)id;

      var instanceArchiveName = GetResourceName(FileName + @"\.zip");
      using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) {
        var entry = instancesZipFile.GetEntry(descriptor.ResourceName);
        NumberFormatInfo numberFormat;
        DateTimeFormatInfo dateFormat;
        char separator;
        using (Stream stream = entry.Open()) {
          TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator);
        }

        TableFileParser csvFileParser = new TableFileParser();
        using (Stream stream = entry.Open()) {
          csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true);
        }

        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
        if (!descriptor.CheckVariableNames(csvFileParser.VariableNames)) {
          throw new ArgumentException("Parsed file contains variables which are not in the descriptor.");
        }

        return descriptor.GenerateRegressionData(dataset);
      }
    }
    public override ITimeSeriesPrognosisProblemData ImportData(string path) {
      TableFileParser csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
      string targetVar = csvFileParser.VariableNames.Last();

      IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));

      ITimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar);

      int trainingPartEnd = csvFileParser.Rows * 2 / 3;
      timeSeriesPrognosisData.TrainingPartition.Start = 0;
      timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd;
      timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd;
      timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows;

      int pos = path.LastIndexOf('\\');
      if (pos < 0)
        timeSeriesPrognosisData.Name = path;
      else {
        pos++;
        timeSeriesPrognosisData.Name = path.Substring(pos, path.Length - pos);
      }
      return timeSeriesPrognosisData;
    }
        public override ITimeSeriesPrognosisProblemData ImportData(string path)
        {
            TableFileParser csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            Dataset dataset   = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
            string  targetVar = csvFileParser.VariableNames.Last();

            IEnumerable <string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));

            ITimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar);

            int trainingPartEnd = csvFileParser.Rows * 2 / 3;

            timeSeriesPrognosisData.TrainingPartition.Start = 0;
            timeSeriesPrognosisData.TrainingPartition.End   = trainingPartEnd;
            timeSeriesPrognosisData.TestPartition.Start     = trainingPartEnd;
            timeSeriesPrognosisData.TestPartition.End       = csvFileParser.Rows;

            int pos = path.LastIndexOf('\\');

            if (pos < 0)
            {
                timeSeriesPrognosisData.Name = path;
            }
            else
            {
                pos++;
                timeSeriesPrognosisData.Name = path.Substring(pos, path.Length - pos);
            }
            return(timeSeriesPrognosisData);
        }
        public TData ImportData(string path, ImportType type, DataAnalysisCSVFormat csvFormat)
        {
            TableFileParser csvFileParser = new TableFileParser();
            long            fileSize      = new FileInfo(path).Length;

            csvFileParser.ProgressChanged += (sender, e) => {
                OnProgressChanged(e / (double)fileSize);
            };
            csvFileParser.Parse(path, csvFormat.NumberFormatInfo, csvFormat.DateTimeFormatInfo, csvFormat.Separator, csvFormat.VariableNamesAvailable);
            return(ImportData(path, type, csvFileParser));
        }
        public override IRegressionProblemData ImportData(string path)
        {
            TableFileParser csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            Dataset dataset   = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
            string  targetVar = dataset.DoubleVariables.Last();

            // turn off input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            var trainingIndizes  = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
                        variableName != targetVar)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
            }

            IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);

            var trainingPartEnd = trainingIndizes.Last();

            regressionData.TrainingPartition.Start = trainingIndizes.First();
            regressionData.TrainingPartition.End   = trainingPartEnd;
            regressionData.TestPartition.Start     = trainingPartEnd;
            regressionData.TestPartition.End       = csvFileParser.Rows;

            regressionData.Name = Path.GetFileName(path);

            return(regressionData);
        }
        public override IClusteringProblemData ImportData(string path)
        {
            var csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

            // turn of input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            var trainingIndizes  = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables);
            }

            ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

            int trainingPartEnd = trainingIndizes.Last();

            clusteringData.TrainingPartition.Start = trainingIndizes.First();
            clusteringData.TrainingPartition.End   = trainingPartEnd;
            clusteringData.TestPartition.Start     = trainingPartEnd;
            clusteringData.TestPartition.End       = csvFileParser.Rows;

            clusteringData.Name = Path.GetFileName(path);

            return(clusteringData);
        }