C# (CSharp) Assets.Scripts.IAJ.Unity.DecisionMaking.MCTS Reward示例

编程语言: C# (CSharp)

命名空间/包名称: Assets.Scripts.IAJ.Unity.DecisionMaking.MCTS

类/类型: Reward

hotexamples.com的示例: 2

C# (CSharp) Assets.Scripts.IAJ.Unity.DecisionMaking.MCTS Reward - 已找到2个示例。这些是从开源项目中提取的最受好评的Assets.Scripts.IAJ.Unity.DecisionMaking.MCTS.Reward现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

GetRewardForNode(3)

示例#1

显示文件

        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel        model = initialPlayoutState.GenerateChildWorldModel();
            List <GOB.Action> actions;
            List <GOB.Action> executableActions = new List <GOB.Action>();

            GOB.Action nextAction = null;
            Reward     reward     = new Reward();
            double     heuristicValue;
            double     accumulatedHeuristicValue;
            double     bestValue, minValue;
            SortedDictionary <double, GOB.Action> heuristicList = new SortedDictionary <double, GOB.Action>();

            actions = model.GetActions();

            while (!model.IsTerminal())
            {
                heuristicList.Clear();
                executableActions.Clear();
                heuristicValue            = 0;
                accumulatedHeuristicValue = 0;

                bestValue = -1;
                minValue  = float.MaxValue;

                if (actions.Count == 0)
                {
                    break;
                }

                foreach (GOB.Action action in actions)
                {
                    if (action.CanExecute(model))
                    {
                        accumulatedHeuristicValue += Math.Pow(Math.E, action.H(model));
                        executableActions.Add(action);
                    }
                }

                foreach (GOB.Action action in executableActions)
                {
                    heuristicValue = Math.Pow(Math.E, action.H(model)) / accumulatedHeuristicValue;

                    if (!heuristicList.ContainsKey(heuristicValue))
                    {
                        heuristicList.Add(heuristicValue, action);
                    }

                    if (heuristicValue > bestValue)
                    {
                        bestValue = heuristicValue;
                    }
                    if (heuristicValue < minValue)
                    {
                        minValue = heuristicValue;
                    }
                }

                double randomNumber = GetRandomNumber(minValue, bestValue);

                foreach (KeyValuePair <double, GOB.Action> actionHeuristic in heuristicList)
                {
                    if (actionHeuristic.Key >= randomNumber)
                    {
                        nextAction = actionHeuristic.Value;
                        break;
                    }
                }

                if (nextAction == null)
                {
                    break;
                }

                nextAction.ApplyActionEffects(model);
                model.CalculateNextPlayer();
            }

            reward.PlayerID = model.GetNextPlayer();
            reward.Value    = model.GetScore();
            return(reward);
        }

示例#2

显示文件

文件： MCTS.cs 项目： hlferreira/IAJ

 protected virtual void Backpropagate(MCTSNode node, Reward reward)
 {
     //TODO: implement
     throw new NotImplementedException();
 }