public TrainingExample[] OverSample(TrainingExample[] trainingExamples, SamplingParams samplingParams) { if (!samplingParams.NeedSampling) return trainingExamples; var trainingSet = new TrainingSet(trainingExamples); var delta = Math.Min(trainingSet.Majority.Length - trainingSet.Minority.Length, trainingSet.Minority.Length * samplingParams.MinorityClassMaxOversampling); var additionalSamples = Enumerable.Range(0, delta).Select(x => OverSample(trainingSet, samplingParams)).ToArray(); return trainingExamples.Concat(additionalSamples).RandomShuffle().ToArray(); }
public static ClassificationAlgorithmBuildResult Create(IClassificationAlgorithm classificationAlgorithm, TrainingExample[] trainingSet) { var errorsCount = trainingSet.Count(x => classificationAlgorithm.Classify(x.Features).PredictedClass != x.ExpectedResult); var error = errorsCount * 1.0 / trainingSet.Length; return new ClassificationAlgorithmBuildResult { ClassificationAlgorithm = classificationAlgorithm, Error = error }; }
public ClassificationAlgorithmBuildResult Build(TrainingExample[] trainingSet, ClassificationAlgorithmParams classificationAlgorithmParams) { totalExamples = trainingSet.Length; outputClassesCount = trainingSet.Max(x => x.ExpectedResult) + 1; featuresCount = trainingSet[0].Features.Length; featuresClassesCount = trainingSet.SelectMany(x => x.Features.Values).Max() + 1; classProb = new double[outputClassesCount]; classCounts = new int[outputClassesCount]; featureWithValueProb = new double[featuresCount, featuresClassesCount]; featureWithValueWithinClassProb = new double[featuresCount, featuresClassesCount, outputClassesCount]; foreach (var trainingExample in trainingSet) { classCounts[trainingExample.ExpectedResult]++; } foreach (var trainingExample in trainingSet) { var features = trainingExample.Features; var classNumber = trainingExample.ExpectedResult; for (var featureNumber = 0; featureNumber < featuresCount; featureNumber++) { featureWithValueProb[featureNumber, features[featureNumber]]++; featureWithValueWithinClassProb[featureNumber, features[featureNumber], classNumber]++; } } for (var classNumber = 0; classNumber < outputClassesCount; classNumber++) { classProb[classNumber] = classCounts[classNumber] * 1.0 / trainingSet.Length; } for (var featureNumber = 0; featureNumber < featuresCount; featureNumber++) { for (var featureValue = 0; featureValue < featuresClassesCount; featureValue++) { featureWithValueProb[featureNumber, featureValue] = Normalize(featureWithValueProb[featureNumber, featureValue], totalExamples); for (var classNumber = 0; classNumber < outputClassesCount; classNumber++) { featureWithValueWithinClassProb[featureNumber, featureValue, classNumber] = Normalize(featureWithValueWithinClassProb[featureNumber, featureValue, classNumber], classCounts[classNumber]); } } } var result = new NaiveBayesAlgorithm(classProb, featureWithValueProb, featureWithValueWithinClassProb, classCounts, totalExamples, outputClassesCount, featuresCount, featuresClassesCount); return ClassificationAlgorithmBuildResult.Create(result, trainingSet); }
public ClassificationAlgorithmBuildResult Build(TrainingExample[] trainingSet, ClassificationAlgorithmParams classificationAlgorithmParams) { var featuresDimensionality = trainingSet[0].Features.Length; var outputClassesCount = trainingSet.Max(x => x.ExpectedResult) + 1; var inputs = trainingSet.Select(example => example.Features.ToDoubleArray()).ToArray(); var outputs = trainingSet.Select(example => example.ExpectedResult).ToArray(); var classifier = new MulticlassSupportVectorMachine(featuresDimensionality, new Linear(), outputClassesCount); var teacher = new MulticlassSupportVectorLearning(classifier, inputs, outputs) { Algorithm = (svm, classInputs, classOutputs, i, j) => new SequentialMinimalOptimization(svm, classInputs, classOutputs) }; teacher.Run(); var result = new SupportVectorMachineAlgorithm(classifier); return ClassificationAlgorithmBuildResult.Create(result, trainingSet); }
public TrainingSet(TrainingExample[] trainingSet) { Total = trainingSet.Length; Minority = trainingSet.Where(x => x.ExpectedResult == 1).ToArray(); Majority = trainingSet.Where(x => x.ExpectedResult == 0).ToArray(); if (Minority.Length > Majority.Length) { TrainingExample[] buf = Minority; Minority = Majority; Majority = buf; } MajorityFeatureFreq = new int[trainingSet[0].Features.Length]; MinorityFeatureFreq = new int[trainingSet[0].Features.Length]; for (var featureNumber = 0; featureNumber < trainingSet[0].Features.Length; featureNumber++) { MajorityFeatureFreq[featureNumber] = Majority.Count(x => x.Features[featureNumber] == 1); MinorityFeatureFreq[featureNumber] = Minority.Count(x => x.Features[featureNumber] == 1); } }
public ClassificationAlgorithmBuildResult Build(TrainingExample[] trainingSet, ClassificationAlgorithmParams classificationAlgorithmParams) { var builder = builders.First(x => x.Type == classificationAlgorithmParams.Type); return builder.Build(trainingSet, classificationAlgorithmParams); }
public NonTerminalNode(TrainingExample[] trainingExamples, int splittingFeature, Dictionary<int, DecisionTreeNode> childs) { TrainingExamples = trainingExamples; SplittingFeature = splittingFeature; Childs = childs; }