private SampleCollection CreateNextSamples(SampleCollection allNegSamples, double falsePositiveRate, int maxSampleNum) { int selectSampleCount = (int)(maxSampleNum / falsePositiveRate); if (selectSampleCount <= 0) { selectSampleCount = allNegSamples.Count; } SampleCollection negSamples = allNegSamples.GetNegSamples(selectSampleCount); int oldNum = negSamples.Count; negSamples = this.GetPositivePredictedSamples(negSamples); if (DebugMsg.Debug) { string msg = string.Format("选取了{0}个负样本,通过了{1}个,误检率为{2:P5}\r\n", oldNum, negSamples.Count, (MyFloat)negSamples.Count / oldNum); DebugMsg.AddMessage(msg, 0); } if (negSamples.Count > maxSampleNum) { negSamples.TrimExcess(maxSampleNum); } GC.Collect(); return(negSamples); }
public void Train(SampleCollection posSamples, SampleCollection allNegSamples, SampleCollection validateSamples, Size size, double targetFalsePositiveRate, double maxFalsePositiveRate, double minHitRate) { const int startCapacity = 20; _size = size; List <StageClassifier> stageClassifier = new List <StageClassifier>(startCapacity); double hitRate = 1; double falsePositiveRate = 1; int stageCount = 0; int maxSampleNum; SampleCollection negSamples; { //计算使用的负样本数量 double MaxMemoryUse = MemoryInfo.GetFreePhysicalMemory() - 400e6; if (MaxMemoryUse < 0) { MaxMemoryUse = 1.8e9; } //MaxMemoryUse = 2.48e9; //按照内存大小来选取负样本,在充分利用内存的同时防止使用虚拟内存 int weakNum = WeakClassifierManager.Instance.WeakClassifiers.Length; maxSampleNum = (int)(( MaxMemoryUse - 1.5e6f //预留150M给程序其它部分 ) / weakNum / sizeof(MyFloat)) - posSamples.Count; if (DebugMsg.Debug) { string msg = string.Format("\r\n单次使用负样本:{0},特征数量:{1},预计消耗内存:{2}M\r\n", maxSampleNum, weakNum, MaxMemoryUse / 1024 / 1024); DebugMsg.AddMessage(msg, 0); } } //如果已有分类器,计算当前的性能 if (_classifiers != null && _classifiers.Length > 0) { PredictResult result = EvaluateErrorRate(validateSamples); hitRate = result.HitRate; falsePositiveRate = result.FalsePositiveRate; stageCount = _classifiers.Length; stageClassifier.AddRange(_classifiers); negSamples = CreateNextSamples(allNegSamples, falsePositiveRate, maxSampleNum); if (DebugMsg.Debug) { string msg = string.Format("载入分类器完成,层数:{0},当前性能为:\r\n检测率:\t{1:P5},\t误检率:\t{2:P5}\r\n------\r\n", stageCount, hitRate, falsePositiveRate); DebugMsg.AddMessage(msg, 0); } } else { negSamples = allNegSamples.GetNegSamples(maxSampleNum); } while (falsePositiveRate > targetFalsePositiveRate && negSamples.Count != 0 && posSamples.Count != 0) { stageCount++; if (DebugMsg.Debug) { string msg = string.Format("--------------------\r\n开始训练第{0}级分类器,使用的数量为:\r\n正样本:\t{1}\t负样本:\t{2}\r\n目标检测率:\t{3:P5}\t目标误检率:\t{4:P5}\r\n", stageCount, posSamples.Count, negSamples.Count, minHitRate, maxFalsePositiveRate); DebugMsg.AddMessage(msg, 0); } StageClassifier currentStage = new StageClassifier(stageCount); PredictResult result = currentStage.Train(posSamples, negSamples, validateSamples, maxFalsePositiveRate, minHitRate); falsePositiveRate *= result.FalsePositiveRate; hitRate *= result.HitRate; stageClassifier.Add(currentStage); _classifiers = stageClassifier.ToArray(); if (DebugMsg.Debug) { string msg = string.Format("------\r\n第{0}级分类器训练结束,当前性能为:\r\n检测率:\t{1:P5},\t误检率:\t{2:P5}\r\n弱分类器数量:{3}\r\n目前训练时间总计:{4}\r\n", stageCount, hitRate, falsePositiveRate, currentStage.WeakClassifierCount, DebugMsg.stopwatch.Elapsed.ToString()); DebugMsg.AddMessage(msg, 0); } if (falsePositiveRate > targetFalsePositiveRate) { //posSamples = this.CreateNextSamples(posSamples); validateSamples = this.GetPositivePredictedSamples(validateSamples); negSamples = CreateNextSamples(allNegSamples, falsePositiveRate, maxSampleNum); } this.Save(string.Format(@"D:\ccc{0}.xml", stageCount.ToString())); } if (DebugMsg.Debug) { DebugMsg.stopwatch.Stop(); string msg = string.Format("\r\n--------------------\r\n全部训练完成,训练时间总计:{0}\r\n", DebugMsg.stopwatch.Elapsed.ToString()); DebugMsg.AddMessage(msg, 0); } this.Save(@"D:\ccc.xml"); }