public override IClusteringProblemData ImportData(string path) { var csvFileParser = new TableFileParser(); csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables); } ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); int trainingPartEnd = trainingIndizes.Last(); clusteringData.TrainingPartition.Start = trainingIndizes.First(); clusteringData.TrainingPartition.End = trainingPartEnd; clusteringData.TestPartition.Start = trainingPartEnd; clusteringData.TestPartition.End = csvFileParser.Rows; clusteringData.Name = Path.GetFileName(path); return clusteringData; }
protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) { List<IList> values = csvFileParser.Values; if (type.Shuffle) { values = Shuffle(values); } Dataset dataset = new Dataset(csvFileParser.VariableNames, values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables); } ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); clusteringData.TrainingPartition.Start = 0; clusteringData.TrainingPartition.End = trainingPartEnd; clusteringData.TestPartition.Start = trainingPartEnd; clusteringData.TestPartition.End = csvFileParser.Rows; clusteringData.Name = Path.GetFileName(path); return clusteringData; }
public ClusteringProblem() : base() { ProblemData = new ClusteringProblemData(); }
private ClusteringProblemData(ClusteringProblemData original, Cloner cloner) : base(original, cloner) { }
private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) { return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), Transformations); }