private TrainingExample OverSampleGeneratively(TrainingSet trainingSet) { var featureVector = trainingSet.MinorityFeatureFreq.Select(freq => GenerateFeature(freq, trainingSet.Total)).ToArray(); return new TrainingExample { Features = new FeatureVector(featureVector), ExpectedResult = 1 }; }
public TrainingExample[] OverSample(TrainingExample[] trainingExamples, SamplingParams samplingParams) { if (!samplingParams.NeedSampling) return trainingExamples; var trainingSet = new TrainingSet(trainingExamples); var delta = Math.Min(trainingSet.Majority.Length - trainingSet.Minority.Length, trainingSet.Minority.Length * samplingParams.MinorityClassMaxOversampling); var additionalSamples = Enumerable.Range(0, delta).Select(x => OverSample(trainingSet, samplingParams)).ToArray(); return trainingExamples.Concat(additionalSamples).RandomShuffle().ToArray(); }
private TrainingExample OverSample(TrainingSet trainingSet, SamplingParams samplingParams) { switch (samplingParams.Strategy) { case SamplingStrategy.Random: return OverSampleRandomly(trainingSet); case SamplingStrategy.Generative: return OverSampleGeneratively(trainingSet); case SamplingStrategy.Perturbed: return OverSamplePerturbed(trainingSet); default: throw new InvalidOperationException(string.Format("No implementation for strategy {0}", samplingParams.Strategy)); } }
private TrainingExample OverSamplePerturbed(TrainingSet trainingSet) { int total = trainingSet.Total; var example = trainingSet.Minority[randomProvider.GetInt(0, trainingSet.Minority.Length)].Features.Values; var featureVector = trainingSet.MinorityFeatureFreq.Select((freq, i) => { var rand = randomProvider.GetInt(0, 100); if (rand < 70) return example[i]; if (rand < 80) return GenerateFeature(freq, total); return (example[i] + GenerateFeature(freq, total)) / 2; }).ToArray(); return new TrainingExample { Features = new FeatureVector(featureVector), ExpectedResult = 1 }; }
private TrainingExample OverSampleRandomly(TrainingSet trainingSet) { var index = randomProvider.GetInt(0, trainingSet.Minority.Length); return trainingSet.Minority[index]; }