public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) { if (problemData == null) { throw new ArgumentNullException("problemData", "The provided problemData is null."); } ClassificationProblemData classificationProblemData = problemData as ClassificationProblemData; if (classificationProblemData == null) { throw new ArgumentException("The problem data is not a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData"); } base.AdjustProblemDataProperties(problemData); TargetVariable = classificationProblemData.TargetVariable; for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++) { ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i); } PositiveClass = classificationProblemData.PositiveClass; for (int i = 0; i < Classes; i++) { for (int j = 0; j < Classes; j++) { ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]); } } }
static ClassificationProblemData() { defaultDataset = new Dataset(defaultVariableNames, defaultData); defaultDataset.Name = "Wisconsin classification problem"; defaultDataset.Description = "subset from to .."; defaultAllowedInputVariables = defaultVariableNames.Except(new List <string>() { "sample", "class" }); defaultTargetVariable = "class"; var problemData = new ClassificationProblemData(); problemData.Parameters.Clear(); problemData.Name = "Empty Classification ProblemData"; problemData.Description = "This ProblemData acts as place holder before the correct problem data is loaded."; problemData.isEmpty = true; problemData.Parameters.Add(new FixedValueParameter <Dataset>(DatasetParameterName, "", new Dataset())); problemData.Parameters.Add(new FixedValueParameter <ReadOnlyCheckedItemList <StringValue> >(InputVariablesParameterName, "")); problemData.Parameters.Add(new FixedValueParameter <IntRange>(TrainingPartitionParameterName, "", (IntRange) new IntRange(0, 0).AsReadOnly())); problemData.Parameters.Add(new FixedValueParameter <IntRange>(TestPartitionParameterName, "", (IntRange) new IntRange(0, 0).AsReadOnly())); problemData.Parameters.Add(new ConstrainedValueParameter <StringValue>(TargetVariableParameterName, new ItemSet <StringValue>())); problemData.Parameters.Add(new FixedValueParameter <StringMatrix>(ClassNamesParameterName, "", new StringMatrix(0, 0).AsReadOnly())); problemData.Parameters.Add(new FixedValueParameter <DoubleMatrix>(ClassificationPenaltiesParameterName, "", (DoubleMatrix) new DoubleMatrix(0, 0).AsReadOnly())); emptyProblemData = problemData; }
private static double CalculateQualityForReplacement( IClassificationModel model, ModifiableDataset modifiableDataset, string variableName, IList originalValues, IEnumerable <int> rows, IList replacementValues, IEnumerable <double> targetValues) { modifiableDataset.ReplaceVariable(variableName, replacementValues); var discModel = model as IDiscriminantFunctionClassificationModel; if (discModel != null) { var problemData = new ClassificationProblemData(modifiableDataset, modifiableDataset.VariableNames, model.TargetVariable); discModel.RecalculateModelParameters(problemData, rows); } //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements var estimates = model.GetEstimatedClassValues(modifiableDataset, rows).ToList(); var ret = CalculateQuality(targetValues, estimates); modifiableDataset.ReplaceVariable(variableName, originalValues); return(ret); }
public override IClassificationProblemData ImportData(string path) { TableFileParser csvFileParser = new TableFileParser(); csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); string targetVar = dataset.DoubleVariables.Last(); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != targetVar) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(targetVar))); } ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar); int trainingPartEnd = trainingIndizes.Last(); classificationData.TrainingPartition.Start = trainingIndizes.First(); classificationData.TrainingPartition.End = trainingPartEnd; classificationData.TestPartition.Start = trainingPartEnd; classificationData.TestPartition.End = csvFileParser.Rows; classificationData.Name = Path.GetFileName(path); return classificationData; }
private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) { var targetVariable = oldProblemData.TargetVariable; if (!context.Data.VariableNames.Contains(targetVariable)) targetVariable = context.Data.VariableNames.First(); var inputVariables = GetDoubleInputVariables(targetVariable); var newProblemData = new ClassificationProblemData(ExportedDataset, inputVariables, targetVariable, Transformations); newProblemData.PositiveClass = oldProblemData.PositiveClass; return newProblemData; }
public IClassificationProblemData GenerateClassificationData(Dataset dataset) { IClassificationProblemData claData = new ClassificationProblemData(dataset, AllowedInputVariables, TargetVariable); claData.Name = this.Name; claData.Description = this.Description; claData.TrainingPartition.Start = this.TrainingPartitionStart; claData.TrainingPartition.End = this.TrainingPartitionEnd; claData.TestPartition.Start = this.TestPartitionStart; claData.TestPartition.End = this.TestPartitionEnd; return claData; }
protected ClassificationProblemData(ClassificationProblemData original, Cloner cloner) : base(original, cloner) { RegisterParameterEvents(); classNamesCache = new List <string>(); for (int i = 0; i < ClassNamesParameter.Value.Rows; i++) { classNamesCache.Add(ClassNamesParameter.Value[i, 0]); } }
protected override void OnProblemDataChanged() { trainingEvaluationCache.Clear(); testEvaluationCache.Clear(); evaluationCache.Clear(); IClassificationProblemData problemData = new ClassificationProblemData(ProblemData.Dataset, ProblemData.AllowedInputVariables, ProblemData.TargetVariable); problemData.TrainingPartition.Start = ProblemData.TrainingPartition.Start; problemData.TrainingPartition.End = ProblemData.TrainingPartition.End; problemData.TestPartition.Start = ProblemData.TestPartition.Start; problemData.TestPartition.End = ProblemData.TestPartition.End; foreach (var solution in ClassificationSolutions) { if (solution is ClassificationEnsembleSolution) { solution.ProblemData = ProblemData; } else { solution.ProblemData = problemData; } } foreach (var trainingPartition in trainingPartitions.Values) { trainingPartition.Start = ProblemData.TrainingPartition.Start; trainingPartition.End = ProblemData.TrainingPartition.End; } foreach (var testPartition in testPartitions.Values) { testPartition.Start = ProblemData.TestPartition.Start; testPartition.End = ProblemData.TestPartition.End; } base.OnProblemDataChanged(); }
protected override void Run(CancellationToken cancellationToken) { // Set up the algorithm if (SetSeedRandomly) Seed = new System.Random().Next(); // Set up the results display var iterations = new IntValue(0); Results.Add(new Result("Iterations", iterations)); var table = new DataTable("Qualities"); table.Rows.Add(new DataRow("Loss (train)")); table.Rows.Add(new DataRow("Loss (test)")); Results.Add(new Result("Qualities", table)); var curLoss = new DoubleValue(); Results.Add(new Result("Loss (train)", curLoss)); // init var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); var lossFunction = LossFunctionParameter.Value; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, lossFunction, (uint)Seed, MaxSize, R, M, Nu); var updateInterval = UpdateIntervalParameter.Value.Value; // Loop until iteration limit reached or canceled. for (int i = 0; i < Iterations; i++) { cancellationToken.ThrowIfCancellationRequested(); GradientBoostedTreesAlgorithmStatic.MakeStep(state); // iteration results if (i % updateInterval == 0) { curLoss.Value = state.GetTrainLoss(); table.Rows["Loss (train)"].Values.Add(curLoss.Value); table.Rows["Loss (test)"].Values.Add(state.GetTestLoss()); iterations.Value = i; } } // final results iterations.Value = Iterations; curLoss.Value = state.GetTrainLoss(); table.Rows["Loss (train)"].Values.Add(curLoss.Value); table.Rows["Loss (test)"].Values.Add(state.GetTestLoss()); // produce variable relevance var orderedImpacts = state.GetVariableRelevance().Select(t => new { name = t.Key, impact = t.Value }).ToList(); var impacts = new DoubleMatrix(); var matrix = impacts as IStringConvertibleMatrix; matrix.Rows = orderedImpacts.Count; matrix.RowNames = orderedImpacts.Select(x => x.name); matrix.Columns = 1; matrix.ColumnNames = new string[] { "Relative variable relevance" }; int rowIdx = 0; foreach (var p in orderedImpacts) { matrix.SetValue(string.Format("{0:N2}", p.impact), rowIdx++, 0); } Results.Add(new Result("Variable relevance", impacts)); Results.Add(new Result("Loss (test)", new DoubleValue(state.GetTestLoss()))); // produce solution if (CreateSolution) { var model = state.GetModel(); // for logistic regression we produce a classification solution if (lossFunction is LogisticRegressionLoss) { var classificationModel = new DiscriminantFunctionClassificationModel(model, new AccuracyMaximizationThresholdCalculator()); var classificationProblemData = new ClassificationProblemData(problemData.Dataset, problemData.AllowedInputVariables, problemData.TargetVariable, problemData.Transformations); classificationModel.RecalculateModelParameters(classificationProblemData, classificationProblemData.TrainingIndices); var classificationSolution = new DiscriminantFunctionClassificationSolution(classificationModel, classificationProblemData); Results.Add(new Result("Solution", classificationSolution)); } else { // otherwise we produce a regression solution Results.Add(new Result("Solution", new RegressionSolution(model, problemData))); } } }
protected override void OnProblemDataChanged() { trainingEvaluationCache.Clear(); testEvaluationCache.Clear(); evaluationCache.Clear(); IClassificationProblemData problemData = new ClassificationProblemData(ProblemData.Dataset, ProblemData.AllowedInputVariables, ProblemData.TargetVariable); problemData.TrainingPartition.Start = ProblemData.TrainingPartition.Start; problemData.TrainingPartition.End = ProblemData.TrainingPartition.End; problemData.TestPartition.Start = ProblemData.TestPartition.Start; problemData.TestPartition.End = ProblemData.TestPartition.End; foreach (var solution in ClassificationSolutions) { if (solution is ClassificationEnsembleSolution) solution.ProblemData = ProblemData; else solution.ProblemData = problemData; } foreach (var trainingPartition in trainingPartitions.Values) { trainingPartition.Start = ProblemData.TrainingPartition.Start; trainingPartition.End = ProblemData.TrainingPartition.End; } foreach (var testPartition in testPartitions.Values) { testPartition.Start = ProblemData.TestPartition.Start; testPartition.End = ProblemData.TestPartition.End; } base.OnProblemDataChanged(); }
protected ClassificationProblemData(ClassificationProblemData original, Cloner cloner) : base(original, cloner) { RegisterParameterEvents(); }
protected override IClassificationProblemData ImportData(string path, ClassificationImportType type, TableFileParser csvFileParser) { int trainingPartEnd = (csvFileParser.Rows * type.TrainingPercentage) / 100; List<IList> values = csvFileParser.Values; if (type.Shuffle) { values = Shuffle(values); if (type.UniformlyDistributeClasses) { values = Shuffle(values, csvFileParser.VariableNames.ToList().FindIndex(x => x.Equals(type.TargetVariable)), type.TrainingPercentage, out trainingPartEnd); } } Dataset dataset = new Dataset(csvFileParser.VariableNames, values); // turn of input variables that are constant in the training partition var allowedInputVars = new List<string>(); var trainingIndizes = Enumerable.Range(0, trainingPartEnd); if (trainingIndizes.Count() >= 2) { foreach (var variableName in dataset.DoubleVariables) { if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) allowedInputVars.Add(variableName); } } else { allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable))); } ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, type.TargetVariable); classificationData.TrainingPartition.Start = 0; classificationData.TrainingPartition.End = trainingPartEnd; classificationData.TestPartition.Start = trainingPartEnd; classificationData.TestPartition.End = csvFileParser.Rows; classificationData.Name = Path.GetFileName(path); return classificationData; }
protected ClassificationProblemData(ClassificationProblemData original, Cloner cloner) : base(original, cloner) { RegisterParameterEvents(); classNamesCache = new List<string>(); for (int i = 0; i < ClassNamesParameter.Value.Rows; i++) classNamesCache.Add(ClassNamesParameter.Value[i, 0]); }
static ClassificationProblemData() { defaultDataset = new Dataset(defaultVariableNames, defaultData); defaultDataset.Name = "Wisconsin classification problem"; defaultDataset.Description = "subset from to .."; defaultAllowedInputVariables = defaultVariableNames.Except(new List<string>() { "sample", "class" }); defaultTargetVariable = "class"; var problemData = new ClassificationProblemData(); problemData.Parameters.Clear(); problemData.Name = "Empty Classification ProblemData"; problemData.Description = "This ProblemData acts as place holder before the correct problem data is loaded."; problemData.isEmpty = true; problemData.Parameters.Add(new FixedValueParameter<Dataset>(DatasetParameterName, "", new Dataset())); problemData.Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>(InputVariablesParameterName, "")); problemData.Parameters.Add(new FixedValueParameter<IntRange>(TrainingPartitionParameterName, "", (IntRange)new IntRange(0, 0).AsReadOnly())); problemData.Parameters.Add(new FixedValueParameter<IntRange>(TestPartitionParameterName, "", (IntRange)new IntRange(0, 0).AsReadOnly())); problemData.Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>())); problemData.Parameters.Add(new FixedValueParameter<StringMatrix>(ClassNamesParameterName, "", new StringMatrix(0, 0).AsReadOnly())); problemData.Parameters.Add(new FixedValueParameter<DoubleMatrix>(ClassificationPenaltiesParameterName, "", (DoubleMatrix)new DoubleMatrix(0, 0).AsReadOnly())); emptyProblemData = problemData; }
public ClassificationProblem() : base() { ProblemData = new ClassificationProblemData(); }