Beispiel #1
0
 public void Reward(double value)
 {
     total += 1;
     if (value > 0)
     {
         wins += 1;
     }
     WinRate = wins / total;
     Historical[lastKey].Output = value < 0 ? lastAction == 1 ? 0 : 1 : lastAction;
     RewardCollected           += value;
     dqnAgent.Learn(value);
 }
Beispiel #2
0
 private void BLearning()
 {
     while (true)
     {
         if (Historical.Count < 20000)
         {
             //
             Thread.Sleep(TimeSpan.FromMinutes(30));
         }
         var correct = 0.0;
         var total   = 0.0;
         var options = new AgentOptions
         {
             Gamma                 = Tembo.Random(0.01, 0.99),
             Epsilon               = Tembo.Random(0.01, 0.75),
             Alpha                 = Tembo.Random(0.01, 0.99),
             ExperinceAddEvery     = Tembo.RandomInt(1, 10000),
             ExperienceSize        = 0,
             LearningSteps         = Tembo.RandomInt(1, 10),
             HiddenUnits           = Tembo.RandomInt(100000, 100000000),
             ErrorClamp            = Tembo.Random(0.01, 1.0),
             AdaptiveLearningSteps = true
         };
         var agent = new DQN(dqnAgent.NumberOfStates, dqnAgent.NumberOfActions, options);
         for (var i = 0; i < Historical.Count; i++)
         {
             var spi    = Historical.ElementAt(i);
             var action = agent.Act(spi.Value.Values);
             if (action == spi.Value.Output)
             {
                 correct += 1;
                 agent.Learn(1);
             }
             else
             {
                 agent.Learn(-1);
             }
             total += 1;
         }
         var winrate = (correct / total) * 100;
         if (winrate > WinRate)
         {
             CN.Log($"NEW AGENT DISCOVERED --> WINRATE {winrate.ToString("p")}, CLASS: {AgentName}", 2);
             Save();
             dqnAgent = agent;
             WinRate  = winrate;
         }
     }
 }
        public void ClassificationByDQN()
        {
            double _loss = 1.0;
            //
            GRasterLayer featureLayer = new GRasterLayer(featureFullFilename);
            GRasterLayer labelLayer   = new GRasterLayer(trainFullFilename);
            //create environment for agent exploring
            IEnv env = new ImageClassifyEnv(featureLayer, labelLayer);
            //create dqn alogrithm
            DQN dqn = new DQN(env);

            //in order to do this quickly, we set training epochs equals 10.
            //please do not use so few training steps in actual use.
            dqn.SetParameters(10, 0);
            //register event to get information while training
            dqn.OnLearningLossEventHandler += (double loss, double totalReward, double accuracy, double progress, string epochesTime) => { _loss = loss; };
            //start dqn alogrithm learning
            dqn.Learn();
            //in general, loss is less than 1
            Assert.IsTrue(_loss < 1.0);
            //apply dqn to classify fetureLayer
            //pick value
            IRasterLayerCursorTool pRasterLayerCursorTool = new GRasterLayerCursorTool();

            pRasterLayerCursorTool.Visit(featureLayer);
            //
            double[] state         = pRasterLayerCursorTool.PickNormalValue(50, 50);
            double[] action        = dqn.ChooseAction(state).action;
            int      landCoverType = dqn.ActionToRawValue(NP.Argmax(action));

            //do something as you need. i.e. draw landCoverType to bitmap at position ( i , j )
            //the classification results are not stable because of the training epochs are too few.
            Assert.IsTrue(landCoverType >= 0);
        }
 /// <summary>
 /// DQN classify task
 /// </summary>
 /// <param name="featureRasterLayer"></param>
 /// <param name="labelRasterLayer"></param>
 /// <param name="epochs"></param>
 public JobDQNClassify(GRasterLayer featureRasterLayer, GRasterLayer labelRasterLayer, int epochs = 3000)
 {
     _t = new Thread(() =>
     {
         ImageClassifyEnv env = new ImageClassifyEnv(featureRasterLayer, labelRasterLayer);
         _dqn = new DQN(env);
         _dqn.SetParameters(epochs: epochs, gamma: _gamma);
         _dqn.OnLearningLossEventHandler += _dqn_OnLearningLossEventHandler;
         //training
         Summary = "模型训练中";
         _dqn.Learn();
         //classification
         Summary = "分类应用中";
         IRasterLayerCursorTool pRasterLayerCursorTool = new GRasterLayerCursorTool();
         pRasterLayerCursorTool.Visit(featureRasterLayer);
         Bitmap classificationBitmap = new Bitmap(featureRasterLayer.XSize, featureRasterLayer.YSize);
         Graphics g      = Graphics.FromImage(classificationBitmap);
         int seed        = 0;
         int totalPixels = featureRasterLayer.XSize * featureRasterLayer.YSize;
         for (int i = 0; i < featureRasterLayer.XSize; i++)
         {
             for (int j = 0; j < featureRasterLayer.YSize; j++)
             {
                 //get normalized input raw value
                 double[] normal = pRasterLayerCursorTool.PickNormalValue(i, j);
                 var(action, q)  = _dqn.ChooseAction(normal);
                 //convert action to raw byte value
                 int gray         = _dqn.ActionToRawValue(NP.Argmax(action));
                 Color c          = Color.FromArgb(gray, gray, gray);
                 Pen p            = new Pen(c);
                 SolidBrush brush = new SolidBrush(c);
                 g.FillRectangle(brush, new Rectangle(i, j, 1, 1));
                 //report progress
                 Process = (double)(seed++) / totalPixels;
             }
         }
         //save result
         string fullFileName = Directory.GetCurrentDirectory() + @"\tmp\" + DateTime.Now.ToFileTimeUtc() + ".png";
         classificationBitmap.Save(fullFileName);
         //complete
         Summary  = "DQN训练分类完成";
         Complete = true;
         OnTaskComplete?.Invoke(Name, fullFileName);
     });
 }