Пример #1
0
        public TrainingExample[] OverSample(TrainingExample[] trainingExamples, SamplingParams samplingParams)
        {
            if (!samplingParams.NeedSampling)
                return trainingExamples;

            var trainingSet = new TrainingSet(trainingExamples);
            var delta = Math.Min(trainingSet.Majority.Length - trainingSet.Minority.Length, trainingSet.Minority.Length * samplingParams.MinorityClassMaxOversampling);
            var additionalSamples = Enumerable.Range(0, delta).Select(x => OverSample(trainingSet, samplingParams)).ToArray();
            return trainingExamples.Concat(additionalSamples).RandomShuffle().ToArray();
        }
        public static ClassificationAlgorithmBuildResult Create(IClassificationAlgorithm classificationAlgorithm, TrainingExample[] trainingSet)
        {
            var errorsCount = trainingSet.Count(x => classificationAlgorithm.Classify(x.Features).PredictedClass != x.ExpectedResult);
            var error = errorsCount * 1.0 / trainingSet.Length;

            return new ClassificationAlgorithmBuildResult
            {
                ClassificationAlgorithm = classificationAlgorithm,
                Error = error
            };
        }
        public ClassificationAlgorithmBuildResult Build(TrainingExample[] trainingSet, ClassificationAlgorithmParams classificationAlgorithmParams)
        {
            totalExamples = trainingSet.Length;
            outputClassesCount = trainingSet.Max(x => x.ExpectedResult) + 1;
            featuresCount = trainingSet[0].Features.Length;
            featuresClassesCount = trainingSet.SelectMany(x => x.Features.Values).Max() + 1;

            classProb = new double[outputClassesCount];
            classCounts = new int[outputClassesCount];
            featureWithValueProb = new double[featuresCount, featuresClassesCount];
            featureWithValueWithinClassProb = new double[featuresCount, featuresClassesCount, outputClassesCount];

            foreach (var trainingExample in trainingSet)
            {
                classCounts[trainingExample.ExpectedResult]++;
            }

            foreach (var trainingExample in trainingSet)
            {
                var features = trainingExample.Features;
                var classNumber = trainingExample.ExpectedResult;

                for (var featureNumber = 0; featureNumber < featuresCount; featureNumber++)
                {
                    featureWithValueProb[featureNumber, features[featureNumber]]++;
                    featureWithValueWithinClassProb[featureNumber, features[featureNumber], classNumber]++;
                }
            }

            for (var classNumber = 0; classNumber < outputClassesCount; classNumber++)
            {
                classProb[classNumber] = classCounts[classNumber] * 1.0 / trainingSet.Length;
            }

            for (var featureNumber = 0; featureNumber < featuresCount; featureNumber++)
            {
                for (var featureValue = 0; featureValue < featuresClassesCount; featureValue++)
                {
                    featureWithValueProb[featureNumber, featureValue] = Normalize(featureWithValueProb[featureNumber, featureValue], totalExamples);

                    for (var classNumber = 0; classNumber < outputClassesCount; classNumber++)
                    {
                        featureWithValueWithinClassProb[featureNumber, featureValue, classNumber] =
                            Normalize(featureWithValueWithinClassProb[featureNumber, featureValue, classNumber], classCounts[classNumber]);
                    }
                }
            }

            var result = new NaiveBayesAlgorithm(classProb, featureWithValueProb, featureWithValueWithinClassProb, classCounts, totalExamples,
                outputClassesCount, featuresCount, featuresClassesCount);

            return ClassificationAlgorithmBuildResult.Create(result, trainingSet);
        }
        public ClassificationAlgorithmBuildResult Build(TrainingExample[] trainingSet, ClassificationAlgorithmParams classificationAlgorithmParams)
        {
            var featuresDimensionality = trainingSet[0].Features.Length;
            var outputClassesCount = trainingSet.Max(x => x.ExpectedResult) + 1;

            var inputs = trainingSet.Select(example => example.Features.ToDoubleArray()).ToArray();
            var outputs = trainingSet.Select(example => example.ExpectedResult).ToArray();

            var classifier = new MulticlassSupportVectorMachine(featuresDimensionality, new Linear(), outputClassesCount);
            var teacher = new MulticlassSupportVectorLearning(classifier, inputs, outputs)
                {
                    Algorithm = (svm, classInputs, classOutputs, i, j) =>
                                new SequentialMinimalOptimization(svm, classInputs, classOutputs)
                };

            teacher.Run();
            var result = new SupportVectorMachineAlgorithm(classifier);

            return ClassificationAlgorithmBuildResult.Create(result, trainingSet);
        }
        public TrainingSet(TrainingExample[] trainingSet)
        {
            Total = trainingSet.Length;
            Minority = trainingSet.Where(x => x.ExpectedResult == 1).ToArray();
            Majority = trainingSet.Where(x => x.ExpectedResult == 0).ToArray();
            if (Minority.Length > Majority.Length)
            {
                TrainingExample[] buf = Minority;
                Minority = Majority;
                Majority = buf;
            }

            MajorityFeatureFreq = new int[trainingSet[0].Features.Length];
            MinorityFeatureFreq = new int[trainingSet[0].Features.Length];
            for (var featureNumber = 0; featureNumber < trainingSet[0].Features.Length; featureNumber++)
            {
                MajorityFeatureFreq[featureNumber] = Majority.Count(x => x.Features[featureNumber] == 1);
                MinorityFeatureFreq[featureNumber] = Minority.Count(x => x.Features[featureNumber] == 1);
            }
        }
 public ClassificationAlgorithmBuildResult Build(TrainingExample[] trainingSet, ClassificationAlgorithmParams classificationAlgorithmParams)
 {
     var builder = builders.First(x => x.Type == classificationAlgorithmParams.Type);
     return builder.Build(trainingSet, classificationAlgorithmParams);
 }
 public NonTerminalNode(TrainingExample[] trainingExamples, int splittingFeature, Dictionary<int, DecisionTreeNode> childs)
 {
     TrainingExamples = trainingExamples;
     SplittingFeature = splittingFeature;
     Childs = childs;
 }