public override ITimeSeriesPrognosisProblemData ImportData(string path) { TableFileParser csvFileParser = new TableFileParser(); csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); string targetVar = csvFileParser.VariableNames.Last(); IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar)); ITimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, targetVar); int trainingPartEnd = csvFileParser.Rows * 2 / 3; timeSeriesPrognosisData.TrainingPartition.Start = 0; timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd; timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd; timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows; int pos = path.LastIndexOf('\\'); if (pos < 0) timeSeriesPrognosisData.Name = path; else { pos++; timeSeriesPrognosisData.Name = path.Substring(pos, path.Length - pos); } return timeSeriesPrognosisData; }
private IDataAnalysisProblemData CreateTimeSeriesPrognosisData(TimeSeriesPrognosisProblemData oldProblemData) { var targetVariable = oldProblemData.TargetVariable; if (!context.Data.VariableNames.Contains(targetVariable)) targetVariable = context.Data.VariableNames.First(); var inputVariables = GetDoubleInputVariables(targetVariable); var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, inputVariables, targetVariable, Transformations) { TrainingHorizon = oldProblemData.TrainingHorizon, TestHorizon = oldProblemData.TestHorizon }; return newProblemData; }
protected TimeSeriesPrognosisProblemData(TimeSeriesPrognosisProblemData original, Cloner cloner) : base(original, cloner) { }
protected override ITimeSeriesPrognosisProblemData ImportData(string path, TimeSeriesPrognosisImportType type, TableFileParser csvFileParser) { Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1; var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); } TimeSeriesPrognosisProblemData timeSeriesPrognosisData = new TimeSeriesPrognosisProblemData(dataset, allowedInputVars, type.TargetVariable); timeSeriesPrognosisData.TrainingPartition.Start = 0; timeSeriesPrognosisData.TrainingPartition.End = trainingPartEnd; timeSeriesPrognosisData.TestPartition.Start = trainingPartEnd; timeSeriesPrognosisData.TestPartition.End = csvFileParser.Rows; timeSeriesPrognosisData.Name = Path.GetFileName(path); return timeSeriesPrognosisData; }