void init(PathPlanningGraph graph) { _retrackLinkTable = new RetrackLinkList[graph.planningLength][]; for (int t = 0; t < graph.planningLength; t++) { _retrackLinkTable[t] = new RetrackLinkList[graph[t].mNodes.Count]; for (int i = 0; i < graph[t].mNodes.Count; i++) { _retrackLinkTable[t][i] = new RetrackLinkList(); PlanningNode node = graph[t].mNodes[i]; _retrackLinkTable[t][i].mNode = node; } } // init retrack list for (int t1 = 0; t1 < graph.planningLength; t1++) { for (int t2 = t1 + 1; t2 < graph.planningLength; t2++) { for (int i1 = 0; i1 < graph[t1].mNodes.Count; i1++) { for (int i2 = 0; i2 < graph[t2].mNodes.Count; i2++) { if (graph[t1].mNodes[i1].pos.X == graph[t2].mNodes[i2].pos.X && graph[t1].mNodes[i1].pos.Y == graph[t2].mNodes[i2].pos.Y) { RetrackLink newLink = new RetrackLink(t2, graph[t2].mNodes[i2]); _retrackLinkTable[t1][i1].linkList.Add(newLink); } } } } } }
void UpdateRewardPesEst(Rewards reward, double[,] entropy, PathPlanningGraph graph, HexaPath path) { int currentLen = path.Length; int totalLen = graph.planningLength; int num = graph[currentLen].mNodes.Count; for (int i = 0; i < num; i++) { PlanningNode node = graph[currentLen].mNodes[i]; HexaPath newpath = new HexaPath(); newpath.AddPos(node.pos); double[,] localEntropy = (double[, ])entropy.Clone(); // instant reward //reward.instantRewards[currentLen][i] = _agent.Score(newpath, entropy, graph); reward.instantRewards[currentLen][i] = GetEstimation(_agent, localEntropy, node.pos, _map); // future reward _agent.Update(newpath, localEntropy); Rewards newreward = new Rewards(reward); // update estimation UpdateEstimation(newreward, localEntropy, graph, currentLen + 1, totalLen - 1); // backtrack Backtrack(newreward, graph, totalLen - 1, currentLen + 1); HexaPath estPath = EstimatePath(graph, currentLen + 1, node.pos, newreward); double futureScore = 0.0; if (estPath.Length > 0) { futureScore = _agent.Score(estPath, localEntropy); } reward.futureRewards[currentLen][i] = futureScore; reward.totalRewards[currentLen][i] = reward.instantRewards[currentLen][i] + reward.futureRewards[currentLen][i]; } }
HexaPath EstimatePath(PathPlanningGraph graph, int currentLevel, HexaPos lastPos, Rewards reward) { HexaPath newpath = new HexaPath(); int endLevel = graph.planningLength - 1; for (int t = currentLevel; t <= endLevel; t++) { PlanningNode lastNode = graph[t - 1].GetNode(lastPos); List <PlanningEdge> edges = graph[t - 1].GetEdges(lastNode); double maxVal = -0.01; HexaPos maxPos = null; List <PlanningEdge> .Enumerator e = edges.GetEnumerator(); while (e.MoveNext()) { int nextIdx = graph[t].GetIndex(e.Current.to); if (reward.totalRewards[t][nextIdx] > maxVal) { maxPos = graph[t].mNodes[nextIdx].pos; maxVal = reward.totalRewards[t][nextIdx]; } } newpath.AddPos(maxPos); lastPos = maxPos; } return(newpath); }
void UpdateNodeReward(ExpandingNode node, HexaPath path, double[,] entropy, PathPlanningGraph graph) { PlanningNode planNode = node.planningNode; double[,] localEntropy = (double[, ])entropy.Clone(); node.instRwd = GetInstantReward(path, localEntropy, graph); node.futrRwd = GetEstimatedMaxFutureReward(planNode, path, localEntropy, graph); // update max val node.maxVal = node.instRwd + node.futrRwd; }
ExpandingNode NodeSpanning(ExpandingTree tree, ExpandingNode node, HexaPath path, double[,] entropy, PathPlanningGraph graph, HexagonalMap map) { PlanningNode planNode = node.planningNode; List <ExpandingNode> newGenerated = new List <ExpandingNode>(); // find all child nodes int curLevel = path.Length - 1; if (curLevel < graph.planningLength - 1) { List <PlanningEdge> nextEdges = graph[curLevel].GetEdges(planNode); List <PlanningEdge> .Enumerator enumEd = nextEdges.GetEnumerator(); while (enumEd.MoveNext()) { ExpandingNode newNode = new ExpandingNode(enumEd.Current.to); tree.AddToParent(newNode, node); newGenerated.Add(newNode); // if new node is already end level, // set it as EXPANDED if (curLevel == graph.planningLength - 2) { newNode.state = ExpandingNode.STATE.EXPANDED; } } } // set node to EXPANDED node.state = ExpandingNode.STATE.EXPANDED; //update the new generated node List <ExpandingNode> .Enumerator e2 = newGenerated.GetEnumerator(); while (e2.MoveNext()) { HexaPath tempPath = tree.GetPath(e2.Current); double[,] tempEntropy = (double[, ])entropy.Clone(); UpdateNodeReward(e2.Current, tempPath, tempEntropy, graph); } //find max node double maxNodeVal = 0.0; ExpandingNode maxNode = null; List <ExpandingNode> .Enumerator e3 = newGenerated.GetEnumerator(); while (e3.MoveNext()) { if (e3.Current.maxVal > maxNodeVal) { maxNode = e3.Current; maxNodeVal = e3.Current.maxVal; } } return(maxNode); }
public PathPlanningGraph GetPathPlanningGraph(HexaPath path, int radius) { int planningLength = path.Length; PathPlanningGraph planningGraph = new PathPlanningGraph(planningLength); // create vertex for (int t = 0; t < planningLength; t++) { HexaPos pivot = path[t]; List <HexaPos> hexes = _topologicalGraph.GetMap().GetHexes(pivot.X, pivot.Y, radius, true); List <HexaPos> .Enumerator e = hexes.GetEnumerator(); while (e.MoveNext()) { Hex currentHex = _topologicalGraph.GetMap().GetHex(e.Current.X, e.Current.Y); if (false == _topologicalGraph.GetMap().MapState.IsObstacle(currentHex)) { PlanningNode node = new PlanningNode(e.Current); planningGraph.AddPlanningNode(node, t); } } } // create edge for (int t = 0; t < planningLength - 1; t++) { LevelPartite currentPartite = planningGraph[t]; LevelPartite nextPartite = planningGraph[t + 1]; List <PlanningNode> .Enumerator e1 = currentPartite.mNodes.GetEnumerator(); List <PlanningNode> .Enumerator e2 = nextPartite.mNodes.GetEnumerator(); while (e1.MoveNext()) { while (e2.MoveNext()) { if (_topologicalGraph.IsConnected(e1.Current.pos, e2.Current.pos)) { currentPartite.Connect(e1.Current, e2.Current); } } e2 = nextPartite.mNodes.GetEnumerator(); } } return(planningGraph); }
double Backpropagation(int level, int currentNodeIdx, PathPlanningGraph graph, double[,] entropy) { double[,] localEntropy = (double[, ])entropy.Clone(); HexaPath subpath = new HexaPath(); subpath.AddPos(graph[level].mNodes[currentNodeIdx].pos); _agent.Update(subpath, localEntropy); int endLevel = graph.planningLength - 1; double[] estimatedReward = null; double[] futureReward = null; double[] instantReward = null; int nodeNum; int edgeNum; for (int l = endLevel; l >= level; l--) { nodeNum = graph[l].mNodes.Count; edgeNum = graph[l].mEdges.Count; instantReward = new double[nodeNum]; futureReward = new double[nodeNum]; for (int i = 0; i < nodeNum; i++) { PlanningNode node = graph[l].mNodes[i]; instantReward[i] = GetEstimation(_agent, localEntropy, node.pos, _map); List <PlanningEdge> edges = graph[l].GetEdges(node); List <PlanningEdge> .Enumerator e = edges.GetEnumerator(); while (e.MoveNext()) { int j = graph[l + 1].GetIndex(e.Current.to); if (estimatedReward[j] > futureReward[i]) { futureReward[i] = estimatedReward[j]; } } } estimatedReward = new double[nodeNum]; for (int i = 0; i < nodeNum; i++) { estimatedReward[i] = instantReward[i] + futureReward[i]; } } return(futureReward[currentNodeIdx]); }
HexaPath GetMaxPath(int level, PlanningNode startNode, PathPlanningGraph graph, double[][] estimatedReward) { HexaPath path = new HexaPath(); int endLevel = graph.planningLength - 1; //path.AddPos(startNode.pos); HexaPos lastPos = startNode.pos; for (int t = level + 1; t <= endLevel; t++) { int index = FindMax(lastPos, estimatedReward[t - level], t, graph); path.AddPos(graph[t].mNodes[index].pos); lastPos = path[path.Length - 1]; } return(path); }
double[] EstimateRewards(int level, PathPlanningGraph graph, double[,] entropy) { double[,] localEntropy = (double[, ])entropy.Clone(); int nodeNum = graph[level].mNodes.Count; double[] estimatedReward = new double[nodeNum]; double[] futureReward = new double[nodeNum]; double[] instantReward = new double[nodeNum]; for (int i = 0; i < nodeNum; i++) { PlanningNode node = graph[level].mNodes[i]; instantReward[i] = GetEstimation(_agent, localEntropy, node.pos, _map); futureReward[i] = Backpropagation(level, i, graph, localEntropy); estimatedReward[i] = instantReward[i] + futureReward[i]; } return(estimatedReward); }
ExpandingTree GetExclusiveExpandingTree(PathPlanningGraph graph, HexaPos start) { PlanningNode startNode = graph[0].GetNode(start); ExpandingNode root = new ExpandingNode(startNode); ExpandingTree expandingTree = new ExpandingTree(root); bool quit = false; for (int l = 0; l < graph.planningLength - 1; l++) { while (expandingTree.GetNewNodeCountByLevel(l) > 0) { quit = false; for (int i = 0; i < expandingTree.nodeList.Count && quit == false; i++) { ExpandingNode currentNode = expandingTree.nodeList[i]; if (currentNode.level == l && currentNode.state == ExpandingNode.STATE.NEW) { PlanningNode planNode = currentNode.planningNode; List <PlanningEdge> edges = graph[l].GetEdges(planNode); List <PlanningEdge> .Enumerator e2 = edges.GetEnumerator(); while (e2.MoveNext()) { ExpandingNode newNode = new ExpandingNode(e2.Current.to); expandingTree.AddToParent(newNode, currentNode); if (l == graph.planningLength - 2) { newNode.state = ExpandingNode.STATE.EXPANDED; } } currentNode.state = ExpandingNode.STATE.EXPANDED; quit = true; } } } } return(expandingTree); }
int FindMax(HexaPos currentPos, double [] values, int level, PathPlanningGraph graph) { double refVal = -0.1; int maxIdx = 0; PlanningNode currentNode = graph[level - 1].GetNode(currentPos); List <PlanningEdge> edges = graph[level - 1].GetEdges(currentNode); List <PlanningEdge> .Enumerator e = edges.GetEnumerator(); while (e.MoveNext()) { int nextIdx = graph[level].GetIndex(e.Current.to); if (values[nextIdx] > refVal) { maxIdx = nextIdx; refVal = values[nextIdx]; } } return(maxIdx); }
void Backtrack(Rewards reward, PathPlanningGraph graph, int fromLevel, int stopAt = 0) { for (int t = fromLevel; t >= stopAt; t--) { int num = reward.totalRewards[t].Length; for (int i = 0; i < num; i++) { PlanningNode node = graph[t].mNodes[i]; List <PlanningEdge> edges = graph[t].GetEdges(node); List <PlanningEdge> .Enumerator e = edges.GetEnumerator(); while (e.MoveNext()) { int j = graph[t + 1].GetIndex(e.Current.to); if (reward.totalRewards[t + 1][j] > reward.futureRewards[t][i]) { reward.futureRewards[t][i] = reward.totalRewards[t + 1][j]; } } reward.totalRewards[t][i] = reward.instantRewards[t][i] + reward.futureRewards[t][i]; } } }
HexaPos GetMax(Rewards reward, PathPlanningGraph graph, HexaPath path) { int pathLen = path.Length; HexaPos lastPos = path[pathLen - 1]; PlanningNode lastNode = graph[pathLen - 1].GetNode(lastPos); List <PlanningEdge> edges = graph[pathLen - 1].GetEdges(lastNode); double maxVal = -0.01; HexaPos maxPos = null; List <PlanningEdge> .Enumerator e = edges.GetEnumerator(); while (e.MoveNext()) { int nextIdx = graph[pathLen].GetIndex(e.Current.to); if (reward.totalRewards[pathLen][nextIdx] > maxVal) { maxPos = graph[pathLen].mNodes[nextIdx].pos; maxVal = reward.totalRewards[pathLen][nextIdx]; } } return(maxPos); }
void UpdateRewardOptEst(Rewards reward, double[,] entropy, PathPlanningGraph graph, HexaPath path) { int currentLen = path.Length; int totalLen = graph.planningLength; int num = graph[currentLen].mNodes.Count; for (int i = 0; i < num; i++) { PlanningNode node = graph[currentLen].mNodes[i]; HexaPath newpath = new HexaPath(); newpath.AddPos(node.pos); double[,] localEntropy = (double[, ])entropy.Clone(); _agent.Update(newpath, localEntropy); Rewards newreward = new Rewards(reward); // update estimation UpdateEstimation(newreward, localEntropy, graph, currentLen, totalLen - 1); // backtrack Backtrack(newreward, graph, totalLen - 2, currentLen); reward.instantRewards[currentLen][i] = newreward.instantRewards[currentLen][i]; reward.futureRewards[currentLen][i] = newreward.futureRewards[currentLen][i]; reward.totalRewards[currentLen][i] = newreward.totalRewards[currentLen][i]; } }
double[] Backpropagation(int level, PathPlanningGraph graph, double[,] entropy) { double[,] localEntropy = (double[, ])entropy.Clone(); int endLevel = graph.planningLength - 1; double[][] estimatedReward = new double[graph.planningLength][]; double[][] futureReward = new double[graph.planningLength][]; double[][] instantReward = new double[graph.planningLength][]; int nodeNum; int edgeNum; //init indepedent rewards double[][] independentReward = new double[graph.planningLength][]; for (int l = 0; l < graph.planningLength; l++) { independentReward[l] = new double[graph[l].mNodes.Count]; estimatedReward[l] = new double[graph[l].mNodes.Count]; futureReward[l] = new double[graph[l].mNodes.Count]; instantReward[l] = new double[graph[l].mNodes.Count]; for (int i = 0; i < graph[l].mNodes.Count; i++) { independentReward[l][i] = GetEstimation(_agent, localEntropy, graph[l].mNodes[i].pos, _map); instantReward[l][i] = independentReward[l][i]; } } bool stop = false; while (stop == false) { stop = true; for (int l = endLevel; l >= level; l--) { nodeNum = graph[l].mNodes.Count; edgeNum = graph[l].mEdges.Count; for (int i = 0; i < nodeNum; i++) { double estRwd = 0.0; double insRwd = instantReward[l][i]; double futRwd = 0.0; PlanningNode node = graph[l].mNodes[i]; List <PlanningEdge> edges = graph[l].GetEdges(node); List <PlanningEdge> .Enumerator e = edges.GetEnumerator(); while (e.MoveNext()) { int j = graph[l + 1].GetIndex(e.Current.to); if (estimatedReward[l + 1][j] > futRwd) { futRwd = estimatedReward[l + 1][j]; } } if (futRwd != futureReward[l][i]) { futureReward[l][i] = futRwd; stop = false; } estimatedReward[l][i] = instantReward[l][i] + futureReward[l][i]; } // find max node and back feed int maxIdx = FindMax(estimatedReward[l]); for (int i = 0; i < nodeNum; i++) { if (i == maxIdx) { List <RetrackLink> .Enumerator eR = _retrackLinkTable[l][maxIdx].linkList.GetEnumerator(); while (eR.MoveNext()) { int nodeLevel = eR.Current.level; int nodeIdx = graph[nodeLevel].GetIndex(eR.Current.retrackNode); instantReward[nodeLevel][nodeIdx] = 0; } } else { List <RetrackLink> .Enumerator eR = _retrackLinkTable[l][maxIdx].linkList.GetEnumerator(); while (eR.MoveNext()) { int nodeLevel = eR.Current.level; int nodeIdx = graph[nodeLevel].GetIndex(eR.Current.retrackNode); instantReward[nodeLevel][nodeIdx] = independentReward[nodeLevel][nodeIdx]; } } } } } Console.WriteLine("IBP RETRK at level " + level.ToString()); for (int i = 0; i < estimatedReward[level].Length; i++) { int posX = graph[level].mNodes[i].pos.X; int posY = graph[level].mNodes[i].pos.Y; Console.WriteLine("Pos[" + posX.ToString() + "," + posY.ToString() + "]=" + estimatedReward[level][i].ToString()); } return((double[])estimatedReward[level].Clone()); }
public override HexaPath FindPath(PathPlanningGraph graph, HexaPos start) { int planningLength = graph.planningLength; _estimated = new Rewards(graph); HexaPath path = new HexaPath(); _optEstimated = new Rewards(_estimated); _pesEstimated = new Rewards(_estimated); double[,] optEntropy = (double[, ])(_localEntropy.Clone()); double[,] pesEntropy = (double[, ])(_localEntropy.Clone()); HexaPath optMaxPath = new HexaPath(); HexaPath pesMaxPath = new HexaPath(); optMaxPath.AddPos(start); pesMaxPath.AddPos(start); for (int t = 1; t < planningLength; t++) { // get path for opt _agent.Update(optMaxPath, optEntropy); UpdateRewardOptEst(_optEstimated, optEntropy, graph, optMaxPath); HexaPos nextOptPos = GetMax(_optEstimated, graph, optMaxPath); optMaxPath.AddPos(nextOptPos); // get path for pes _agent.Update(pesMaxPath, pesEntropy); UpdateRewardPesEst(_pesEstimated, pesEntropy, graph, pesMaxPath); HexaPos nextPesPos = GetMax(_pesEstimated, graph, pesMaxPath); pesMaxPath.AddPos(nextPesPos); } double optMaxScore = _agent.Score(optMaxPath, _localEntropy); double pesMaxScore = _agent.Score(pesMaxPath, _localEntropy); int maxTryCnt = 10; bool converged = false; int tryCnt = 0; while (converged == false && tryCnt <= maxTryCnt) { tryCnt++; if (pesMaxScore >= optMaxScore) { path = pesMaxPath; converged = true; } else { path = optMaxPath; // correct the estimation at step t int diffFrom = pesMaxPath.DifferentAt(optMaxPath); HexaPath subpath = optMaxPath.SubPath(diffFrom, optMaxPath.Length - 1); PlanningNode diffNode = graph[diffFrom].GetNode(subpath[0]); int diffIdx = graph[diffFrom].GetIndex(diffNode); HexaPath prevPath = optMaxPath.SubPath(0, diffFrom - 1); double[,] tempEntropy = (double[, ])(_localEntropy.Clone()); _agent.Update(prevPath, tempEntropy); _pesEstimated.totalRewards[diffFrom][diffIdx] = _agent.Score(subpath, tempEntropy); HexaPath newCandidatePath = new HexaPath(); newCandidatePath.AddPos(start); HexaPath newSubCandidate = EstimatePath(graph, 1, start, _pesEstimated); newCandidatePath.Merge(newSubCandidate); double newCandidateScore = _agent.Score(newSubCandidate, _localEntropy); if (newCandidateScore <= pesMaxScore) { converged = true; } else { pesMaxScore = newCandidateScore; pesMaxPath = newCandidatePath; } } } return(path); }
double GetEstimatedMaxFutureReward(PlanningNode node, HexaPath path, double[,] entropy, PathPlanningGraph graph) { int endLevel = graph.planningLength - 1; int currentLevel = path.Length - 1; if (endLevel == currentLevel) { return(0.0); } double maxFutureScore = 0.0; // backtrack //start from end level, init future score as 0 int nodeNum = 0; double[] futureScore = null; double[] instantScore = null; double[] totalScore = null; for (int l = endLevel; l > currentLevel; l--) { nodeNum = graph[l].mNodes.Count; futureScore = new double[nodeNum]; instantScore = new double[nodeNum]; for (int i = 0; i < nodeNum; i++) { PlanningNode tempNode = graph[l].mNodes[i]; instantScore[i] = GetEstimation(_agent, entropy, tempNode.pos, _map); if (l < endLevel) { List <PlanningEdge> edges = graph[l].GetEdges(tempNode); List <PlanningEdge> .Enumerator e = edges.GetEnumerator(); while (e.MoveNext()) { int j = graph[l + 1].GetIndex(e.Current.to); if (totalScore[j] > futureScore[i]) { futureScore[i] = totalScore[j]; } } } else { futureScore[i] = 0.0; } } totalScore = new double[nodeNum]; for (int i = 0; i < nodeNum; i++) { totalScore[i] = instantScore[i] + futureScore[i]; } } // estimate future reward HexaPos currentPos = node.pos; List <PlanningEdge> nextEdges = graph[currentLevel].GetEdges(node); List <PlanningEdge> .Enumerator enumEdge = nextEdges.GetEnumerator(); while (enumEdge.MoveNext()) { int j = graph[currentLevel + 1].GetIndex(enumEdge.Current.to); if (totalScore[j] > maxFutureScore) { maxFutureScore = totalScore[j]; } } return(maxFutureScore); }
public override HexaPath FindPath(PathPlanningGraph graph, HexaPos start) { HexaPath path = null; double currentScore = 0.0; PlanningNode startNode = graph[0].GetNode(start); ExpandingNode root = new ExpandingNode(startNode); ExpandingTree expandingTree = new ExpandingTree(root); List <double> scoreList = new List <double>(); bool exhaustivelyEnumerated = false; bool stopCritera = false; int counter = 0; HexaPath maxPath = null; double maxScore = 0.0; do { path = ExpandToFindPath(expandingTree, graph, _localEntropy); if (path == null) { stopCritera = true; } else { currentScore = ScorePath(_agent, _localEntropy, path); if (currentScore > maxScore) { maxScore = currentScore; maxPath = path; } } scoreList.Add(currentScore); expandingTree.Freeze(maxScore); if (counter == 0) { scoreAtFirstRun = currentScore; } //expandingTree.Draw("Expanding-Tree-" + counter.ToString()); counter++; Console.WriteLine(counter + ", " + currentScore + ", " + maxScore + ", " + expandingTree.nodeNum); }while((iteratingOnce == false || exhaustivelyEnumerated == true) && (stopCritera == false)); totalRunTime = scoreList.Count; finalMaxScore = maxScore; hitOptimalRunTime = FindMaxScoreIndex(scoreList, maxScore); //expandingTree.Draw("Expanding-Tree-N"); Console.WriteLine("The number of node expanded is " + expandingTree.nodeNum); exploredSize = expandingTree.nodeNum; Console.WriteLine("The number of complete expanding node is " + graph.GetExpandingNodeNumber()); problemSize = graph.GetExpandingNodeNumber(); return(maxPath); }
public RetrackLink(int lvl, PlanningNode node) { level = lvl; retrackNode = node; }
double Backpropagation(int level, int currentNodeIdx, PathPlanningGraph graph, double[,] entropy) { double[,] localEntropy = (double[, ])entropy.Clone(); HexaPath subpath = new HexaPath(); PlanningNode startNode = graph[level].mNodes[currentNodeIdx]; subpath.AddPos(startNode.pos); _agent.Update(subpath, localEntropy); int endLevel = graph.planningLength - 1; int nodeNum; int edgeNum; double[][] estimatedReward = new double[endLevel - level + 1][]; double[][] futureReward = new double[endLevel - level + 1][]; double[][] instantReward = new double[endLevel - level + 1][]; for (int l = level; l <= endLevel; l++) { nodeNum = graph[l].mNodes.Count; estimatedReward[l - level] = new double[nodeNum]; futureReward[l - level] = new double[nodeNum]; instantReward[l - level] = new double[nodeNum]; for (int i = 0; i < nodeNum; i++) { PlanningNode node = graph[l].mNodes[i]; instantReward[l - level][i] = GetEstimation(_agent, localEntropy, node.pos, _map); } } nodeNum = graph[endLevel].mNodes.Count; for (int i = 0; i < nodeNum; i++) { estimatedReward[endLevel - level][i] = instantReward[endLevel - level][i]; } for (int l = endLevel - 1; l >= level; l--) { nodeNum = graph[l].mNodes.Count; edgeNum = graph[l].mEdges.Count; for (int i = 0; i < nodeNum; i++) { PlanningNode node = graph[l].mNodes[i]; List <PlanningEdge> edges = graph[l].GetEdges(node); List <PlanningEdge> .Enumerator e = edges.GetEnumerator(); while (e.MoveNext()) { int j = graph[l + 1].GetIndex(e.Current.to); if (estimatedReward[l - level + 1][j] > futureReward[l - level][i]) { futureReward[l - level][i] = estimatedReward[l - level + 1][j]; } } } for (int i = 0; i < nodeNum; i++) { estimatedReward[l - level][i] = instantReward[l - level][i] + futureReward[l - level][i]; } } // refine and return the best estimatedReward[level+1][] double maxActualReward = 0.0; //int maxIdx = GetMaxIdx(futureReward[0]); //double maxExpReward = futureReward[0][maxIdx]; //while (maxActualReward < maxExpReward) { HexaPath localMaxPath = GetMaxPath(level, startNode, graph, estimatedReward); maxActualReward = _agent.Score(localMaxPath, localEntropy); //futureReward[0][maxIdx] = maxActualReward; //maxIdx = GetMaxIdx(futureReward[0]); //maxExpReward = futureReward[0][maxIdx]; } return(maxActualReward); // futureReward[0][currentNodeIdx]; }