public override IClusteringProblemData ImportData(string path) {
      var csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

      // turn of input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables);
      }

      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

      int trainingPartEnd = trainingIndizes.Last();
      clusteringData.TrainingPartition.Start = trainingIndizes.First();
      clusteringData.TrainingPartition.End = trainingPartEnd;
      clusteringData.TestPartition.Start = trainingPartEnd;
      clusteringData.TestPartition.End = csvFileParser.Rows;

      clusteringData.Name = Path.GetFileName(path);

      return clusteringData;
    }
        public override ILogModellingProblemData ImportData(string path)
        {
            var csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            var dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

            string caseIDVar    = dataset.VariableNames.First();
            string timestampVar = dataset.VariableNames.First();
            string activityVar  = dataset.VariableNames.First();


            ILogModellingProblemData logData = new LogModellingProblemData(dataset, caseIDVar, timestampVar, activityVar);

            IEnumerable <int> trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
            int trainingPartEnd = trainingIndizes.Last();

            //TODO: when (if not removed) separating test and training, group by caseid
            logData.TrainingPartition.Start = trainingIndizes.First();
            logData.TrainingPartition.End   = trainingPartEnd;
            logData.TestPartition.Start     = trainingPartEnd;
            logData.TestPartition.End       = csvFileParser.Rows;

            logData.Name = Path.GetFileName(path);

            return(logData);
        }
    public override IRegressionProblemData ImportData(string path) {
      TableFileParser csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
      string targetVar = dataset.DoubleVariables.Last();

      // turn off input variables that are constant in the training partition
      var allowedInputVars = new List<string>();
      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
      if (trainingIndizes.Count() >= 2) {
        foreach (var variableName in dataset.DoubleVariables) {
          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
            variableName != targetVar)
            allowedInputVars.Add(variableName);
        }
      } else {
        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
      }

      IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);

      var trainingPartEnd = trainingIndizes.Last();
      regressionData.TrainingPartition.Start = trainingIndizes.First();
      regressionData.TrainingPartition.End = trainingPartEnd;
      regressionData.TestPartition.Start = trainingPartEnd;
      regressionData.TestPartition.End = csvFileParser.Rows;

      regressionData.Name = Path.GetFileName(path);

      return regressionData;
    }
    public override ITimeSeriesPrognosisProblemData ImportData(string path) {
      TableFileParser csvFileParser = new TableFileParser();
      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
      string targetVar = csvFileParser.VariableNames.Last();

      IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));

      ITimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar);

      int trainingPartEnd = csvFileParser.Rows * 2 / 3;
      timeSeriesPrognosisData.TrainingPartition.Start = 0;
      timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd;
      timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd;
      timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows;

      int pos = path.LastIndexOf('\\');
      if (pos < 0)
        timeSeriesPrognosisData.Name = path;
      else {
        pos++;
        timeSeriesPrognosisData.Name = path.Substring(pos, path.Length - pos);
      }
      return timeSeriesPrognosisData;
    }
        public override ITimeSeriesPrognosisProblemData ImportData(string path)
        {
            TableFileParser csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            Dataset dataset   = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
            string  targetVar = csvFileParser.VariableNames.Last();

            IEnumerable <string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));

            ITimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar);

            int trainingPartEnd = csvFileParser.Rows * 2 / 3;

            timeSeriesPrognosisData.TrainingPartition.Start = 0;
            timeSeriesPrognosisData.TrainingPartition.End   = trainingPartEnd;
            timeSeriesPrognosisData.TestPartition.Start     = trainingPartEnd;
            timeSeriesPrognosisData.TestPartition.End       = csvFileParser.Rows;

            int pos = path.LastIndexOf('\\');

            if (pos < 0)
            {
                timeSeriesPrognosisData.Name = path;
            }
            else
            {
                pos++;
                timeSeriesPrognosisData.Name = path.Substring(pos, path.Length - pos);
            }
            return(timeSeriesPrognosisData);
        }
        public override IRegressionProblemData ImportData(string path)
        {
            TableFileParser csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            Dataset dataset   = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
            string  targetVar = dataset.DoubleVariables.Last();

            // turn off input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            var trainingIndizes  = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
                        variableName != targetVar)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar)));
            }

            IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);

            var trainingPartEnd = trainingIndizes.Last();

            regressionData.TrainingPartition.Start = trainingIndizes.First();
            regressionData.TrainingPartition.End   = trainingPartEnd;
            regressionData.TestPartition.Start     = trainingPartEnd;
            regressionData.TestPartition.End       = csvFileParser.Rows;

            regressionData.Name = Path.GetFileName(path);

            return(regressionData);
        }
        public override IClusteringProblemData ImportData(string path)
        {
            var csvFileParser = new TableFileParser();

            csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));

            Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);

            // turn of input variables that are constant in the training partition
            var allowedInputVars = new List <string>();
            var trainingIndizes  = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);

            if (trainingIndizes.Count() >= 2)
            {
                foreach (var variableName in dataset.DoubleVariables)
                {
                    if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
                    {
                        allowedInputVars.Add(variableName);
                    }
                }
            }
            else
            {
                allowedInputVars.AddRange(dataset.DoubleVariables);
            }

            ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);

            int trainingPartEnd = trainingIndizes.Last();

            clusteringData.TrainingPartition.Start = trainingIndizes.First();
            clusteringData.TrainingPartition.End   = trainingPartEnd;
            clusteringData.TestPartition.Start     = trainingPartEnd;
            clusteringData.TestPartition.End       = csvFileParser.Rows;

            clusteringData.Name = Path.GetFileName(path);

            return(clusteringData);
        }