public static void Run(IClassificationProblemData problemData, IEnumerable <string> allowedInputVariables, int svmType, int kernelType, double cost, double nu, double gamma, int degree, out ISupportVectorMachineModel model, out int nSv) { var dataset = problemData.Dataset; string targetVariable = problemData.TargetVariable; IEnumerable <int> rows = problemData.TrainingIndices; svm_parameter parameter = new svm_parameter { svm_type = svmType, kernel_type = kernelType, C = cost, nu = nu, gamma = gamma, cache_size = 500, probability = 0, eps = 0.001, degree = degree, shrinking = 1, coef0 = 0 }; var weightLabels = new List <int>(); var weights = new List <double>(); foreach (double c in problemData.ClassValues) { double wSum = 0.0; foreach (double otherClass in problemData.ClassValues) { if (!c.IsAlmost(otherClass)) { wSum += problemData.GetClassificationPenalty(c, otherClass); } } weightLabels.Add((int)c); weights.Add(wSum); } parameter.weight_label = weightLabels.ToArray(); parameter.weight = weights.ToArray(); svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows); RangeTransform rangeTransform = RangeTransform.Compute(problem); svm_problem scaledProblem = rangeTransform.Scale(problem); var svmModel = svm.svm_train(scaledProblem, parameter); nSv = svmModel.SV.Length; model = new SupportVectorMachineModel(svmModel, rangeTransform, targetVariable, allowedInputVariables, problemData.ClassValues); }
public static double Calculate(IClassificationModel model, IClassificationProblemData problemData, IEnumerable<int> rows) { var estimations = model.GetEstimatedClassValues(problemData.Dataset, rows).GetEnumerator(); if (!estimations.MoveNext()) return double.NaN; var penalty = 0.0; var count = 0; foreach (var r in rows) { var actualClass = problemData.Dataset.GetDoubleValue(problemData.TargetVariable, r); penalty += problemData.GetClassificationPenalty(actualClass, estimations.Current); estimations.MoveNext(); count++; } return penalty / count; }
public static double Calculate(IClassificationModel model, IClassificationProblemData problemData, IEnumerable <int> rows) { var estimations = model.GetEstimatedClassValues(problemData.Dataset, rows).GetEnumerator(); if (!estimations.MoveNext()) { return(double.NaN); } var penalty = 0.0; var count = 0; foreach (var r in rows) { var actualClass = problemData.Dataset.GetDoubleValue(problemData.TargetVariable, r); penalty += problemData.GetClassificationPenalty(actualClass, estimations.Current); estimations.MoveNext(); count++; } return(penalty / count); }
public ClassificationProblemData(IClassificationProblemData classificationProblemData) : this(classificationProblemData.Dataset, classificationProblemData.AllowedInputVariables, classificationProblemData.TargetVariable) { TrainingPartition.Start = classificationProblemData.TrainingPartition.Start; TrainingPartition.End = classificationProblemData.TrainingPartition.End; TestPartition.Start = classificationProblemData.TestPartition.Start; TestPartition.End = classificationProblemData.TestPartition.End; PositiveClass = classificationProblemData.PositiveClass; for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++) { ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i); } for (int i = 0; i < Classes; i++) { for (int j = 0; j < Classes; j++) { ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]); } } }
public ClassificationProblemData(IClassificationProblemData classificationProblemData) : this(classificationProblemData.Dataset, classificationProblemData.AllowedInputVariables, classificationProblemData.TargetVariable) { TrainingPartition.Start = classificationProblemData.TrainingPartition.Start; TrainingPartition.End = classificationProblemData.TrainingPartition.End; TestPartition.Start = classificationProblemData.TestPartition.Start; TestPartition.End = classificationProblemData.TestPartition.End; for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++) { ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i); } //mkommend: The positive class depends on the class names and as a result must only be set after the classe names parameter. PositiveClass = classificationProblemData.PositiveClass; for (int i = 0; i < Classes; i++) { for (int j = 0; j < Classes; j++) { ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]); } } }
public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable <double> estimatedValues, IEnumerable <double> targetClassValues, out double[] classValues, out double[] thresholds) { const int slices = 100; const double minThresholdInc = 10e-5; // necessary to prevent infinite loop when maxEstimated - minEstimated is effectively zero (constant model) List <double> estimatedValuesList = estimatedValues.ToList(); double maxEstimatedValue = estimatedValuesList.Max(); double minEstimatedValue = estimatedValuesList.Min(); double thresholdIncrement = Math.Max((maxEstimatedValue - minEstimatedValue) / slices, minThresholdInc); var estimatedAndTargetValuePairs = estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y }) .OrderBy(x => x.EstimatedValue).ToList(); classValues = estimatedAndTargetValuePairs.GroupBy(x => x.TargetClassValue) .Select(x => new { Median = x.Select(y => y.EstimatedValue).Median(), Class = x.Key }) .OrderBy(x => x.Median).Select(x => x.Class).ToArray(); int nClasses = classValues.Length; thresholds = new double[nClasses]; thresholds[0] = double.NegativeInfinity; // incrementally calculate accuracy of all possible thresholds for (int i = 1; i < thresholds.Length; i++) { double lowerThreshold = thresholds[i - 1]; double actualThreshold = Math.Max(lowerThreshold, minEstimatedValue); double lowestBestThreshold = double.NaN; double highestBestThreshold = double.NaN; double bestClassificationScore = double.PositiveInfinity; bool seriesOfEqualClassificationScores = false; while (actualThreshold < maxEstimatedValue) { double classificationScore = 0.0; foreach (var pair in estimatedAndTargetValuePairs) { //all positives if (pair.TargetClassValue.IsAlmost(classValues[i - 1])) { if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold) { //true positive classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue); } else { //false negative classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]); } } //all negatives else { //false positive if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold) { classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 1]); } else if (pair.EstimatedValue <= lowerThreshold) { classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 2]); } else if (pair.EstimatedValue > actualThreshold) { if (pair.TargetClassValue < classValues[i - 1]) //negative in wrong class, consider upper class { classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]); } else //true negative, must be optimized by the other thresholds { classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue); } } } } //new best classification score found if (classificationScore < bestClassificationScore) { bestClassificationScore = classificationScore; lowestBestThreshold = actualThreshold; highestBestThreshold = actualThreshold; seriesOfEqualClassificationScores = true; } //equal classification scores => if seriesOfEqualClassifcationScores == true update highest threshold else if (Math.Abs(classificationScore - bestClassificationScore) < double.Epsilon && seriesOfEqualClassificationScores) { highestBestThreshold = actualThreshold; } //worse classificatoin score found reset seriesOfEqualClassifcationScores else { seriesOfEqualClassificationScores = false; } actualThreshold += thresholdIncrement; } //scale lowest thresholds and highest found optimal threshold according to the misclassification matrix double falseNegativePenalty = problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]); double falsePositivePenalty = problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]); thresholds[i] = (lowestBestThreshold * falsePositivePenalty + highestBestThreshold * falseNegativePenalty) / (falseNegativePenalty + falsePositivePenalty); } }
public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) { const int slices = 100; const double minThresholdInc = 10e-5; // necessary to prevent infinite loop when maxEstimated - minEstimated is effectively zero (constant model) List<double> estimatedValuesList = estimatedValues.ToList(); double maxEstimatedValue = estimatedValuesList.Max(); double minEstimatedValue = estimatedValuesList.Min(); double thresholdIncrement = Math.Max((maxEstimatedValue - minEstimatedValue) / slices, minThresholdInc); var estimatedAndTargetValuePairs = estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y }) .OrderBy(x => x.EstimatedValue).ToList(); classValues = estimatedAndTargetValuePairs.GroupBy(x => x.TargetClassValue) .Select(x => new { Median = x.Select(y => y.EstimatedValue).Median(), Class = x.Key }) .OrderBy(x => x.Median).Select(x => x.Class).ToArray(); int nClasses = classValues.Length; thresholds = new double[nClasses]; thresholds[0] = double.NegativeInfinity; // incrementally calculate accuracy of all possible thresholds for (int i = 1; i < thresholds.Length; i++) { double lowerThreshold = thresholds[i - 1]; double actualThreshold = Math.Max(lowerThreshold, minEstimatedValue); double lowestBestThreshold = double.NaN; double highestBestThreshold = double.NaN; double bestClassificationScore = double.PositiveInfinity; bool seriesOfEqualClassificationScores = false; while (actualThreshold < maxEstimatedValue) { double classificationScore = 0.0; foreach (var pair in estimatedAndTargetValuePairs) { //all positives if (pair.TargetClassValue.IsAlmost(classValues[i - 1])) { if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold) //true positive classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue); else //false negative classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]); } //all negatives else { //false positive if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold) classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 1]); else if (pair.EstimatedValue <= lowerThreshold) classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 2]); else if (pair.EstimatedValue > actualThreshold) { if (pair.TargetClassValue < classValues[i - 1]) //negative in wrong class, consider upper class classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]); else //true negative, must be optimized by the other thresholds classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue); } } } //new best classification score found if (classificationScore < bestClassificationScore) { bestClassificationScore = classificationScore; lowestBestThreshold = actualThreshold; highestBestThreshold = actualThreshold; seriesOfEqualClassificationScores = true; } //equal classification scores => if seriesOfEqualClassifcationScores == true update highest threshold else if (Math.Abs(classificationScore - bestClassificationScore) < double.Epsilon && seriesOfEqualClassificationScores) highestBestThreshold = actualThreshold; //worse classificatoin score found reset seriesOfEqualClassifcationScores else seriesOfEqualClassificationScores = false; actualThreshold += thresholdIncrement; } //scale lowest thresholds and highest found optimal threshold according to the misclassification matrix double falseNegativePenalty = problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]); double falsePositivePenalty = problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]); thresholds[i] = (lowestBestThreshold * falsePositivePenalty + highestBestThreshold * falseNegativePenalty) / (falseNegativePenalty + falsePositivePenalty); } }
public ClassificationProblemData(IClassificationProblemData classificationProblemData) : this(classificationProblemData.Dataset, classificationProblemData.AllowedInputVariables, classificationProblemData.TargetVariable) { TrainingPartition.Start = classificationProblemData.TrainingPartition.Start; TrainingPartition.End = classificationProblemData.TrainingPartition.End; TestPartition.Start = classificationProblemData.TestPartition.Start; TestPartition.End = classificationProblemData.TestPartition.End; PositiveClass = classificationProblemData.PositiveClass; for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++) ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i); for (int i = 0; i < Classes; i++) { for (int j = 0; j < Classes; j++) { ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]); } } }