Пример #1
0
        public static void Run(IClassificationProblemData problemData, IEnumerable <string> allowedInputVariables,
                               int svmType, int kernelType, double cost, double nu, double gamma, int degree,
                               out ISupportVectorMachineModel model, out int nSv)
        {
            var               dataset        = problemData.Dataset;
            string            targetVariable = problemData.TargetVariable;
            IEnumerable <int> rows           = problemData.TrainingIndices;

            svm_parameter parameter = new svm_parameter {
                svm_type    = svmType,
                kernel_type = kernelType,
                C           = cost,
                nu          = nu,
                gamma       = gamma,
                cache_size  = 500,
                probability = 0,
                eps         = 0.001,
                degree      = degree,
                shrinking   = 1,
                coef0       = 0
            };

            var weightLabels = new List <int>();
            var weights      = new List <double>();

            foreach (double c in problemData.ClassValues)
            {
                double wSum = 0.0;
                foreach (double otherClass in problemData.ClassValues)
                {
                    if (!c.IsAlmost(otherClass))
                    {
                        wSum += problemData.GetClassificationPenalty(c, otherClass);
                    }
                }
                weightLabels.Add((int)c);
                weights.Add(wSum);
            }
            parameter.weight_label = weightLabels.ToArray();
            parameter.weight       = weights.ToArray();

            svm_problem    problem        = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows);
            RangeTransform rangeTransform = RangeTransform.Compute(problem);
            svm_problem    scaledProblem  = rangeTransform.Scale(problem);
            var            svmModel       = svm.svm_train(scaledProblem, parameter);

            nSv = svmModel.SV.Length;

            model = new SupportVectorMachineModel(svmModel, rangeTransform, targetVariable, allowedInputVariables, problemData.ClassValues);
        }
    public static double Calculate(IClassificationModel model, IClassificationProblemData problemData, IEnumerable<int> rows) {
      var estimations = model.GetEstimatedClassValues(problemData.Dataset, rows).GetEnumerator();
      if (!estimations.MoveNext()) return double.NaN;

      var penalty = 0.0;
      var count = 0;
      foreach (var r in rows) {
        var actualClass = problemData.Dataset.GetDoubleValue(problemData.TargetVariable, r);
        penalty += problemData.GetClassificationPenalty(actualClass, estimations.Current);
        estimations.MoveNext();
        count++;
      }
      return penalty / count;
    }
        public static double Calculate(IClassificationModel model, IClassificationProblemData problemData, IEnumerable <int> rows)
        {
            var estimations = model.GetEstimatedClassValues(problemData.Dataset, rows).GetEnumerator();

            if (!estimations.MoveNext())
            {
                return(double.NaN);
            }

            var penalty = 0.0;
            var count   = 0;

            foreach (var r in rows)
            {
                var actualClass = problemData.Dataset.GetDoubleValue(problemData.TargetVariable, r);
                penalty += problemData.GetClassificationPenalty(actualClass, estimations.Current);
                estimations.MoveNext();
                count++;
            }
            return(penalty / count);
        }
Пример #4
0
        public ClassificationProblemData(IClassificationProblemData classificationProblemData)
            : this(classificationProblemData.Dataset, classificationProblemData.AllowedInputVariables, classificationProblemData.TargetVariable)
        {
            TrainingPartition.Start = classificationProblemData.TrainingPartition.Start;
            TrainingPartition.End   = classificationProblemData.TrainingPartition.End;
            TestPartition.Start     = classificationProblemData.TestPartition.Start;
            TestPartition.End       = classificationProblemData.TestPartition.End;

            PositiveClass = classificationProblemData.PositiveClass;

            for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++)
            {
                ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i);
            }

            for (int i = 0; i < Classes; i++)
            {
                for (int j = 0; j < Classes; j++)
                {
                    ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]);
                }
            }
        }
        public ClassificationProblemData(IClassificationProblemData classificationProblemData)
            : this(classificationProblemData.Dataset, classificationProblemData.AllowedInputVariables, classificationProblemData.TargetVariable)
        {
            TrainingPartition.Start = classificationProblemData.TrainingPartition.Start;
            TrainingPartition.End   = classificationProblemData.TrainingPartition.End;
            TestPartition.Start     = classificationProblemData.TestPartition.Start;
            TestPartition.End       = classificationProblemData.TestPartition.End;

            for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++)
            {
                ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i);
            }

            //mkommend: The positive class depends on the class names and as a result must only be set after the classe names parameter.
            PositiveClass = classificationProblemData.PositiveClass;

            for (int i = 0; i < Classes; i++)
            {
                for (int j = 0; j < Classes; j++)
                {
                    ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]);
                }
            }
        }
        public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable <double> estimatedValues, IEnumerable <double> targetClassValues, out double[] classValues, out double[] thresholds)
        {
            const int     slices                       = 100;
            const double  minThresholdInc              = 10e-5; // necessary to prevent infinite loop when maxEstimated - minEstimated is effectively zero (constant model)
            List <double> estimatedValuesList          = estimatedValues.ToList();
            double        maxEstimatedValue            = estimatedValuesList.Max();
            double        minEstimatedValue            = estimatedValuesList.Min();
            double        thresholdIncrement           = Math.Max((maxEstimatedValue - minEstimatedValue) / slices, minThresholdInc);
            var           estimatedAndTargetValuePairs =
                estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y })
                .OrderBy(x => x.EstimatedValue).ToList();

            classValues = estimatedAndTargetValuePairs.GroupBy(x => x.TargetClassValue)
                          .Select(x => new { Median = x.Select(y => y.EstimatedValue).Median(), Class = x.Key })
                          .OrderBy(x => x.Median).Select(x => x.Class).ToArray();

            int nClasses = classValues.Length;

            thresholds    = new double[nClasses];
            thresholds[0] = double.NegativeInfinity;

            // incrementally calculate accuracy of all possible thresholds
            for (int i = 1; i < thresholds.Length; i++)
            {
                double lowerThreshold                    = thresholds[i - 1];
                double actualThreshold                   = Math.Max(lowerThreshold, minEstimatedValue);
                double lowestBestThreshold               = double.NaN;
                double highestBestThreshold              = double.NaN;
                double bestClassificationScore           = double.PositiveInfinity;
                bool   seriesOfEqualClassificationScores = false;

                while (actualThreshold < maxEstimatedValue)
                {
                    double classificationScore = 0.0;

                    foreach (var pair in estimatedAndTargetValuePairs)
                    {
                        //all positives
                        if (pair.TargetClassValue.IsAlmost(classValues[i - 1]))
                        {
                            if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold)
                            {
                                //true positive
                                classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue);
                            }
                            else
                            {
                                //false negative
                                classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]);
                            }
                        }
                        //all negatives
                        else
                        {
                            //false positive
                            if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold)
                            {
                                classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 1]);
                            }
                            else if (pair.EstimatedValue <= lowerThreshold)
                            {
                                classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 2]);
                            }
                            else if (pair.EstimatedValue > actualThreshold)
                            {
                                if (pair.TargetClassValue < classValues[i - 1]) //negative in wrong class, consider upper class
                                {
                                    classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]);
                                }
                                else //true negative, must be optimized by the other thresholds
                                {
                                    classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue);
                                }
                            }
                        }
                    }

                    //new best classification score found
                    if (classificationScore < bestClassificationScore)
                    {
                        bestClassificationScore           = classificationScore;
                        lowestBestThreshold               = actualThreshold;
                        highestBestThreshold              = actualThreshold;
                        seriesOfEqualClassificationScores = true;
                    }
                    //equal classification scores => if seriesOfEqualClassifcationScores == true update highest threshold
                    else if (Math.Abs(classificationScore - bestClassificationScore) < double.Epsilon && seriesOfEqualClassificationScores)
                    {
                        highestBestThreshold = actualThreshold;
                    }
                    //worse classificatoin score found reset seriesOfEqualClassifcationScores
                    else
                    {
                        seriesOfEqualClassificationScores = false;
                    }

                    actualThreshold += thresholdIncrement;
                }
                //scale lowest thresholds and highest found optimal threshold according to the misclassification matrix
                double falseNegativePenalty = problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]);
                double falsePositivePenalty = problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]);
                thresholds[i] = (lowestBestThreshold * falsePositivePenalty + highestBestThreshold * falseNegativePenalty) / (falseNegativePenalty + falsePositivePenalty);
            }
        }
    public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
      const int slices = 100;
      const double minThresholdInc = 10e-5; // necessary to prevent infinite loop when maxEstimated - minEstimated is effectively zero (constant model)
      List<double> estimatedValuesList = estimatedValues.ToList();
      double maxEstimatedValue = estimatedValuesList.Max();
      double minEstimatedValue = estimatedValuesList.Min();
      double thresholdIncrement = Math.Max((maxEstimatedValue - minEstimatedValue) / slices, minThresholdInc);
      var estimatedAndTargetValuePairs =
        estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y })
        .OrderBy(x => x.EstimatedValue).ToList();

      classValues = estimatedAndTargetValuePairs.GroupBy(x => x.TargetClassValue)
        .Select(x => new { Median = x.Select(y => y.EstimatedValue).Median(), Class = x.Key })
        .OrderBy(x => x.Median).Select(x => x.Class).ToArray();

      int nClasses = classValues.Length;
      thresholds = new double[nClasses];
      thresholds[0] = double.NegativeInfinity;

      // incrementally calculate accuracy of all possible thresholds
      for (int i = 1; i < thresholds.Length; i++) {
        double lowerThreshold = thresholds[i - 1];
        double actualThreshold = Math.Max(lowerThreshold, minEstimatedValue);
        double lowestBestThreshold = double.NaN;
        double highestBestThreshold = double.NaN;
        double bestClassificationScore = double.PositiveInfinity;
        bool seriesOfEqualClassificationScores = false;

        while (actualThreshold < maxEstimatedValue) {
          double classificationScore = 0.0;

          foreach (var pair in estimatedAndTargetValuePairs) {
            //all positives
            if (pair.TargetClassValue.IsAlmost(classValues[i - 1])) {
              if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold)
                //true positive
                classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue);
              else
                //false negative
                classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]);
            }
              //all negatives
            else {
              //false positive
              if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold)
                classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 1]);
              else if (pair.EstimatedValue <= lowerThreshold)
                classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 2]);
              else if (pair.EstimatedValue > actualThreshold) {
                if (pair.TargetClassValue < classValues[i - 1]) //negative in wrong class, consider upper class
                  classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]);
                else //true negative, must be optimized by the other thresholds
                  classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue);
              }
            }
          }

          //new best classification score found
          if (classificationScore < bestClassificationScore) {
            bestClassificationScore = classificationScore;
            lowestBestThreshold = actualThreshold;
            highestBestThreshold = actualThreshold;
            seriesOfEqualClassificationScores = true;
          }
            //equal classification scores => if seriesOfEqualClassifcationScores == true update highest threshold
          else if (Math.Abs(classificationScore - bestClassificationScore) < double.Epsilon && seriesOfEqualClassificationScores)
            highestBestThreshold = actualThreshold;
          //worse classificatoin score found reset seriesOfEqualClassifcationScores
          else seriesOfEqualClassificationScores = false;

          actualThreshold += thresholdIncrement;
        }
        //scale lowest thresholds and highest found optimal threshold according to the misclassification matrix
        double falseNegativePenalty = problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]);
        double falsePositivePenalty = problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]);
        thresholds[i] = (lowestBestThreshold * falsePositivePenalty + highestBestThreshold * falseNegativePenalty) / (falseNegativePenalty + falsePositivePenalty);
      }
    }
    public ClassificationProblemData(IClassificationProblemData classificationProblemData)
      : this(classificationProblemData.Dataset, classificationProblemData.AllowedInputVariables, classificationProblemData.TargetVariable) {
      TrainingPartition.Start = classificationProblemData.TrainingPartition.Start;
      TrainingPartition.End = classificationProblemData.TrainingPartition.End;
      TestPartition.Start = classificationProblemData.TestPartition.Start;
      TestPartition.End = classificationProblemData.TestPartition.End;

      PositiveClass = classificationProblemData.PositiveClass;

      for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++)
        ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i);

      for (int i = 0; i < Classes; i++) {
        for (int j = 0; j < Classes; j++) {
          ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]);
        }
      }
    }