public override IClusteringProblemData ImportData(string path) { var csvFileParser = new TableFileParser(); csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables); } ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); int trainingPartEnd = trainingIndizes.Last(); clusteringData.TrainingPartition.Start = trainingIndizes.First(); clusteringData.TrainingPartition.End = trainingPartEnd; clusteringData.TestPartition.Start = trainingPartEnd; clusteringData.TestPartition.End = csvFileParser.Rows; clusteringData.Name = Path.GetFileName(path); return clusteringData; }
public override IRegressionProblemData LoadData(IDataDescriptor id) { var descriptor = (ResourceRegressionDataDescriptor)id; var instanceArchiveName = GetResourceName(FileName + @"\.zip"); using (var instancesZipFile = new ZipArchive(GetType().Assembly.GetManifestResourceStream(instanceArchiveName), ZipArchiveMode.Read)) { var entry = instancesZipFile.GetEntry(descriptor.ResourceName); NumberFormatInfo numberFormat; DateTimeFormatInfo dateFormat; char separator; using (Stream stream = entry.Open()) { TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator); } TableFileParser csvFileParser = new TableFileParser(); using (Stream stream = entry.Open()) { csvFileParser.Parse(stream, numberFormat, dateFormat, separator, true); } Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); if (!descriptor.CheckVariableNames(csvFileParser.VariableNames)) { throw new ArgumentException("Parsed file contains variables which are not in the descriptor."); } return descriptor.GenerateRegressionData(dataset); } }
public override IRegressionProblemData ImportData(string path) { TableFileParser csvFileParser = new TableFileParser(); csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); string targetVar = dataset.DoubleVariables.Last(); // turn off input variables that are constant in the training partition var allowedInputVars = new List<string>(); var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != targetVar) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); } IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar); var trainingPartEnd = trainingIndizes.Last(); regressionData.TrainingPartition.Start = trainingIndizes.First(); regressionData.TrainingPartition.End = trainingPartEnd; regressionData.TestPartition.Start = trainingPartEnd; regressionData.TestPartition.End = csvFileParser.Rows; regressionData.Name = Path.GetFileName(path); return regressionData; }
public override ITimeSeriesPrognosisProblemData ImportData(string path) { TableFileParser csvFileParser = new TableFileParser(); csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); string targetVar = csvFileParser.VariableNames.Last(); IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar)); ITimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar); int trainingPartEnd = csvFileParser.Rows * 2 / 3; timeSeriesPrognosisData.TrainingPartition.Start = 0; timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd; timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd; timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows; int pos = path.LastIndexOf('\\'); if (pos < 0) timeSeriesPrognosisData.Name = path; else { pos++; timeSeriesPrognosisData.Name = path.Substring(pos, path.Length - pos); } return timeSeriesPrognosisData; }
protected override ITimeSeriesPrognosisProblemData ImportData(string path, TimeSeriesPrognosisImportType type, TableFileParser csvFileParser) { Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1; var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); } TimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, type.TargetVariable); timeSeriesPrognosisData.TrainingPartition.Start = 0; timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd; timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd; timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows; timeSeriesPrognosisData.Name = Path.GetFileName(path); return timeSeriesPrognosisData; }
protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) { List<IList> values = csvFileParser.Values; if (type.Shuffle) { values = Shuffle(values); } Dataset dataset = new Dataset(csvFileParser.VariableNames, values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables); } ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); clusteringData.TrainingPartition.Start = 0; clusteringData.TrainingPartition.End = trainingPartEnd; clusteringData.TestPartition.Start = trainingPartEnd; clusteringData.TestPartition.End = csvFileParser.Rows; clusteringData.Name = Path.GetFileName(path); return clusteringData; }
protected virtual TData ImportData(string path, ImportType type, TableFileParser csvFileParser) { throw new NotSupportedException(); }
protected override ITimeSeriesPrognosisProblemData ImportData(string path, TimeSeriesPrognosisImportType type, TableFileParser csvFileParser) { Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); // turn of input variables that are constant in the training partition var allowedInputVars = new List <string>(); int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1; var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) { allowedInputVars.Add(variableName); } } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); } TimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, type.TargetVariable); timeSeriesPrognosisData.TrainingPartition.Start = 0; timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd; timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd; timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows; timeSeriesPrognosisData.Name = Path.GetFileName(path); return(timeSeriesPrognosisData); }
protected override IClusteringProblemData ImportData(string path, DataAnalysisImportType type, TableFileParser csvFileParser) { List <IList> values = csvFileParser.Values; if (type.Shuffle) { values = Shuffle(values); } Dataset dataset = new Dataset(csvFileParser.VariableNames, values); // turn of input variables that are constant in the training partition var allowedInputVars = new List <string>(); int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0) { allowedInputVars.Add(variableName); } } } else { allowedInputVars.AddRange(dataset.DoubleVariables); } ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars); clusteringData.TrainingPartition.Start = 0; clusteringData.TrainingPartition.End = trainingPartEnd; clusteringData.TestPartition.Start = trainingPartEnd; clusteringData.TestPartition.End = csvFileParser.Rows; clusteringData.Name = Path.GetFileName(path); return(clusteringData); }
protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) { int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; List<IList> values = csvFileParser.Values; if (type.Shuffle) { values = Shuffle(values); if (type.UniformlyDistributeClasses) { values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)), type.TrainingPercentage, out trainingPartEnd); } } Dataset dataset = new Dataset(csvFileParser.VariableNames, values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); } ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable); classificationData.TrainingPartition.Start = 0; classificationData.TrainingPartition.End = trainingPartEnd; classificationData.TestPartition.Start = trainingPartEnd; classificationData.TestPartition.End = csvFileParser.Rows; classificationData.Name = Path.GetFileName(path); return classificationData; }
protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) { int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; List <IList> values = csvFileParser.Values; if (type.Shuffle) { values = Shuffle(values); if (type.UniformlyDistributeClasses) { values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)), type.TrainingPercentage, out trainingPartEnd); } } Dataset dataset = new Dataset(csvFileParser.VariableNames, values); // turn of input variables that are constant in the training partition var allowedInputVars = new List <string>(); var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) { allowedInputVars.Add(variableName); } } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); } ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable); classificationData.TrainingPartition.Start = 0; classificationData.TrainingPartition.End = trainingPartEnd; classificationData.TestPartition.Start = trainingPartEnd; classificationData.TestPartition.End = csvFileParser.Rows; classificationData.Name = Path.GetFileName(path); return(classificationData); }