protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) { int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; List <IList> values = csvFileParser.Values; if (type.Shuffle) { values = Shuffle(values); if (type.UniformlyDistributeClasses) { values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)), type.TrainingPercentage, out trainingPartEnd); } } Dataset dataset = new Dataset(csvFileParser.VariableNames, values); // turn of input variables that are constant in the training partition var allowedInputVars = new List <string>(); var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) { allowedInputVars.Add(variableName); } } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); } ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable); classificationData.TrainingPartition.Start = 0; classificationData.TrainingPartition.End = trainingPartEnd; classificationData.TestPartition.Start = trainingPartEnd; classificationData.TestPartition.End = csvFileParser.Rows; classificationData.Name = Path.GetFileName(path); return(classificationData); }
protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) { int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; List<IList> values = csvFileParser.Values; if (type.Shuffle) { values = Shuffle(values); if (type.UniformlyDistributeClasses) { values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)), type.TrainingPercentage, out trainingPartEnd); } } Dataset dataset = new Dataset(csvFileParser.VariableNames, values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); } ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable); classificationData.TrainingPartition.Start = 0; classificationData.TrainingPartition.End = trainingPartEnd; classificationData.TestPartition.Start = trainingPartEnd; classificationData.TestPartition.End = csvFileParser.Rows; classificationData.Name = Path.GetFileName(path); return classificationData; }