//计算理想状态价值分布,理想收敛系数gamma = 0.9 [0.9 ~ 0.99] //public void CalculateStateValueDistribution(AINode _node, AINode _lastNode) //{ // float gamma = BlackBord.Gamma; // Debug.LogFormat("node{0}.stateValue =========>> {1}", _node.LocationIndex, _node.StateValue); // foreach (var act in _node.actions) // { // if (_lastNode != null && _lastNode.actions.Contains(act)) // continue; // if (n.IsOptimalAction(node)) // if (!calOnceList.Contains(act)) // { // AINode n = BlackBord.GetNode(act); // calOnceList.Add(_node.LocationIndex); // n.StateValue = _node.Reward + gamma * _node.StateValue; // CalculateStateValueDistribution(n, _node); // } // } // foreach (var act in node.actions) // { // if (!calOnceList.Contains(act)) // { // calOnceList.Add(act); // CalculateStateValueDistribution(act); // } // } //} public void CalculateStateValueDistribution(int _targetIndex) { float gamma = BlackBord.Gamma; BetterList <int> allLeft = BlackBord.GetActiveNodeIndexes(); int sum = BlackBord.GetNodesCount(); int i = 0; while (allLeft.size > 0) { int index = (_targetIndex + i) % sum; AINode node = BlackBord.GetNode(index); if (!node.IsActived || node.StateValue == -1) { i++; continue; } float nextStateValue = node.StateValue == 0 ? (node.Reward + gamma * node.StateValue) : node.StateValue; foreach (var act in node.actions) { AINode n = BlackBord.GetNode(act); float nsv = n.Reward + gamma * nextStateValue; if (nsv > n.StateValue && n.Reward < 1) { n.StateValue = nsv; } } allLeft.Remove(index); i++; } }
public void InitGrid() { int nodeCount = BlackBord.GetNodesCount(); for (int i = 0; i < nodeCount; i++) { BlackBord.Nodes.Add(new AINode(i)); } }