Esempio n. 1
0
        private SampleCollection CreateNextSamples(SampleCollection allNegSamples, double falsePositiveRate, int maxSampleNum)
        {
            int selectSampleCount = (int)(maxSampleNum / falsePositiveRate);

            if (selectSampleCount <= 0)
            {
                selectSampleCount = allNegSamples.Count;
            }
            SampleCollection negSamples = allNegSamples.GetNegSamples(selectSampleCount);
            int oldNum = negSamples.Count;

            negSamples = this.GetPositivePredictedSamples(negSamples);
            if (DebugMsg.Debug)
            {
                string msg = string.Format("选取了{0}个负样本,通过了{1}个,误检率为{2:P5}\r\n",
                                           oldNum, negSamples.Count, (MyFloat)negSamples.Count / oldNum);
                DebugMsg.AddMessage(msg, 0);
            }
            if (negSamples.Count > maxSampleNum)
            {
                negSamples.TrimExcess(maxSampleNum);
            }
            GC.Collect();
            return(negSamples);
        }
Esempio n. 2
0
        public void Train(SampleCollection posSamples,
                          SampleCollection allNegSamples,
                          SampleCollection validateSamples,
                          Size size,
                          double targetFalsePositiveRate, double maxFalsePositiveRate, double minHitRate)
        {
            const int startCapacity = 20;

            _size = size;
            List <StageClassifier> stageClassifier = new List <StageClassifier>(startCapacity);
            double hitRate           = 1;
            double falsePositiveRate = 1;
            int    stageCount        = 0;

            int maxSampleNum;
            SampleCollection negSamples;

            {   //计算使用的负样本数量
                double MaxMemoryUse = MemoryInfo.GetFreePhysicalMemory() - 400e6;
                if (MaxMemoryUse < 0)
                {
                    MaxMemoryUse = 1.8e9;
                }
                //MaxMemoryUse = 2.48e9;
                //按照内存大小来选取负样本,在充分利用内存的同时防止使用虚拟内存
                int weakNum = WeakClassifierManager.Instance.WeakClassifiers.Length;
                maxSampleNum = (int)((
                                         MaxMemoryUse
                                         - 1.5e6f //预留150M给程序其它部分
                                         )
                                     / weakNum / sizeof(MyFloat)) - posSamples.Count;

                if (DebugMsg.Debug)
                {
                    string msg = string.Format("\r\n单次使用负样本:{0},特征数量:{1},预计消耗内存:{2}M\r\n",
                                               maxSampleNum, weakNum, MaxMemoryUse / 1024 / 1024);
                    DebugMsg.AddMessage(msg, 0);
                }
            }

            //如果已有分类器,计算当前的性能
            if (_classifiers != null && _classifiers.Length > 0)
            {
                PredictResult result = EvaluateErrorRate(validateSamples);
                hitRate           = result.HitRate;
                falsePositiveRate = result.FalsePositiveRate;
                stageCount        = _classifiers.Length;
                stageClassifier.AddRange(_classifiers);

                negSamples = CreateNextSamples(allNegSamples, falsePositiveRate, maxSampleNum);

                if (DebugMsg.Debug)
                {
                    string msg = string.Format("载入分类器完成,层数:{0},当前性能为:\r\n检测率:\t{1:P5},\t误检率:\t{2:P5}\r\n------\r\n",
                                               stageCount, hitRate, falsePositiveRate);
                    DebugMsg.AddMessage(msg, 0);
                }
            }
            else
            {
                negSamples = allNegSamples.GetNegSamples(maxSampleNum);
            }

            while (falsePositiveRate > targetFalsePositiveRate && negSamples.Count != 0 && posSamples.Count != 0)
            {
                stageCount++;

                if (DebugMsg.Debug)
                {
                    string msg = string.Format("--------------------\r\n开始训练第{0}级分类器,使用的数量为:\r\n正样本:\t{1}\t负样本:\t{2}\r\n目标检测率:\t{3:P5}\t目标误检率:\t{4:P5}\r\n",
                                               stageCount, posSamples.Count, negSamples.Count, minHitRate, maxFalsePositiveRate);
                    DebugMsg.AddMessage(msg, 0);
                }

                StageClassifier currentStage = new StageClassifier(stageCount);
                PredictResult   result       = currentStage.Train(posSamples, negSamples, validateSamples,
                                                                  maxFalsePositiveRate, minHitRate);
                falsePositiveRate *= result.FalsePositiveRate;
                hitRate           *= result.HitRate;

                stageClassifier.Add(currentStage);
                _classifiers = stageClassifier.ToArray();

                if (DebugMsg.Debug)
                {
                    string msg = string.Format("------\r\n第{0}级分类器训练结束,当前性能为:\r\n检测率:\t{1:P5},\t误检率:\t{2:P5}\r\n弱分类器数量:{3}\r\n目前训练时间总计:{4}\r\n",
                                               stageCount, hitRate, falsePositiveRate, currentStage.WeakClassifierCount, DebugMsg.stopwatch.Elapsed.ToString());
                    DebugMsg.AddMessage(msg, 0);
                }
                if (falsePositiveRate > targetFalsePositiveRate)
                {
                    //posSamples = this.CreateNextSamples(posSamples);
                    validateSamples = this.GetPositivePredictedSamples(validateSamples);
                    negSamples      = CreateNextSamples(allNegSamples, falsePositiveRate, maxSampleNum);
                }
                this.Save(string.Format(@"D:\ccc{0}.xml", stageCount.ToString()));
            }

            if (DebugMsg.Debug)
            {
                DebugMsg.stopwatch.Stop();
                string msg = string.Format("\r\n--------------------\r\n全部训练完成,训练时间总计:{0}\r\n",
                                           DebugMsg.stopwatch.Elapsed.ToString());
                DebugMsg.AddMessage(msg, 0);
            }

            this.Save(@"D:\ccc.xml");
        }