void init(PathPlanningGraph graph)
 {
     _retrackLinkTable = new RetrackLinkList[graph.planningLength][];
     for (int t = 0; t < graph.planningLength; t++)
     {
         _retrackLinkTable[t] = new RetrackLinkList[graph[t].mNodes.Count];
         for (int i = 0; i < graph[t].mNodes.Count; i++)
         {
             _retrackLinkTable[t][i] = new RetrackLinkList();
             PlanningNode node = graph[t].mNodes[i];
             _retrackLinkTable[t][i].mNode = node;
         }
     }
     // init retrack list
     for (int t1 = 0; t1 < graph.planningLength; t1++)
     {
         for (int t2 = t1 + 1; t2 < graph.planningLength; t2++)
         {
             for (int i1 = 0; i1 < graph[t1].mNodes.Count; i1++)
             {
                 for (int i2 = 0; i2 < graph[t2].mNodes.Count; i2++)
                 {
                     if (graph[t1].mNodes[i1].pos.X == graph[t2].mNodes[i2].pos.X &&
                         graph[t1].mNodes[i1].pos.Y == graph[t2].mNodes[i2].pos.Y)
                     {
                         RetrackLink newLink = new RetrackLink(t2, graph[t2].mNodes[i2]);
                         _retrackLinkTable[t1][i1].linkList.Add(newLink);
                     }
                 }
             }
         }
     }
 }
        void UpdateRewardPesEst(Rewards reward, double[,] entropy, PathPlanningGraph graph, HexaPath path)
        {
            int currentLen = path.Length;
            int totalLen   = graph.planningLength;
            int num        = graph[currentLen].mNodes.Count;

            for (int i = 0; i < num; i++)
            {
                PlanningNode node    = graph[currentLen].mNodes[i];
                HexaPath     newpath = new HexaPath();
                newpath.AddPos(node.pos);
                double[,] localEntropy = (double[, ])entropy.Clone();
                // instant reward
                //reward.instantRewards[currentLen][i] = _agent.Score(newpath, entropy, graph);
                reward.instantRewards[currentLen][i] = GetEstimation(_agent, localEntropy, node.pos, _map);

                // future reward
                _agent.Update(newpath, localEntropy);
                Rewards newreward = new Rewards(reward);
                // update estimation
                UpdateEstimation(newreward, localEntropy, graph, currentLen + 1, totalLen - 1);
                // backtrack
                Backtrack(newreward, graph, totalLen - 1, currentLen + 1);

                HexaPath estPath     = EstimatePath(graph, currentLen + 1, node.pos, newreward);
                double   futureScore = 0.0;
                if (estPath.Length > 0)
                {
                    futureScore = _agent.Score(estPath, localEntropy);
                }
                reward.futureRewards[currentLen][i] = futureScore;
                reward.totalRewards[currentLen][i]  = reward.instantRewards[currentLen][i]
                                                      + reward.futureRewards[currentLen][i];
            }
        }
        HexaPath EstimatePath(PathPlanningGraph graph, int currentLevel, HexaPos lastPos, Rewards reward)
        {
            HexaPath newpath  = new HexaPath();
            int      endLevel = graph.planningLength - 1;

            for (int t = currentLevel; t <= endLevel; t++)
            {
                PlanningNode        lastNode = graph[t - 1].GetNode(lastPos);
                List <PlanningEdge> edges    = graph[t - 1].GetEdges(lastNode);
                double  maxVal = -0.01;
                HexaPos maxPos = null;
                List <PlanningEdge> .Enumerator e = edges.GetEnumerator();
                while (e.MoveNext())
                {
                    int nextIdx = graph[t].GetIndex(e.Current.to);
                    if (reward.totalRewards[t][nextIdx] > maxVal)
                    {
                        maxPos = graph[t].mNodes[nextIdx].pos;
                        maxVal = reward.totalRewards[t][nextIdx];
                    }
                }
                newpath.AddPos(maxPos);
                lastPos = maxPos;
            }

            return(newpath);
        }
Пример #4
0
        void UpdateNodeReward(ExpandingNode node, HexaPath path, double[,] entropy, PathPlanningGraph graph)
        {
            PlanningNode planNode = node.planningNode;

            double[,] localEntropy = (double[, ])entropy.Clone();
            node.instRwd           = GetInstantReward(path, localEntropy, graph);
            node.futrRwd           = GetEstimatedMaxFutureReward(planNode, path, localEntropy, graph);
            // update max val
            node.maxVal = node.instRwd + node.futrRwd;
        }
Пример #5
0
        ExpandingNode NodeSpanning(ExpandingTree tree, ExpandingNode node, HexaPath path, double[,] entropy, PathPlanningGraph graph, HexagonalMap map)
        {
            PlanningNode         planNode     = node.planningNode;
            List <ExpandingNode> newGenerated = new List <ExpandingNode>();
            // find all child nodes
            int curLevel = path.Length - 1;

            if (curLevel < graph.planningLength - 1)
            {
                List <PlanningEdge>             nextEdges = graph[curLevel].GetEdges(planNode);
                List <PlanningEdge> .Enumerator enumEd    = nextEdges.GetEnumerator();
                while (enumEd.MoveNext())
                {
                    ExpandingNode newNode = new ExpandingNode(enumEd.Current.to);
                    tree.AddToParent(newNode, node);
                    newGenerated.Add(newNode);

                    // if new node is already end level,
                    // set it as EXPANDED
                    if (curLevel == graph.planningLength - 2)
                    {
                        newNode.state = ExpandingNode.STATE.EXPANDED;
                    }
                }
            }

            // set node to EXPANDED
            node.state = ExpandingNode.STATE.EXPANDED;

            //update the new generated node
            List <ExpandingNode> .Enumerator e2 = newGenerated.GetEnumerator();
            while (e2.MoveNext())
            {
                HexaPath tempPath = tree.GetPath(e2.Current);
                double[,] tempEntropy = (double[, ])entropy.Clone();
                UpdateNodeReward(e2.Current, tempPath, tempEntropy, graph);
            }

            //find max node
            double        maxNodeVal = 0.0;
            ExpandingNode maxNode    = null;

            List <ExpandingNode> .Enumerator e3 = newGenerated.GetEnumerator();
            while (e3.MoveNext())
            {
                if (e3.Current.maxVal > maxNodeVal)
                {
                    maxNode    = e3.Current;
                    maxNodeVal = e3.Current.maxVal;
                }
            }

            return(maxNode);
        }
Пример #6
0
        public PathPlanningGraph GetPathPlanningGraph(HexaPath path, int radius)
        {
            int planningLength = path.Length;
            PathPlanningGraph planningGraph = new PathPlanningGraph(planningLength);

            // create vertex
            for (int t = 0; t < planningLength; t++)
            {
                HexaPos        pivot = path[t];
                List <HexaPos> hexes = _topologicalGraph.GetMap().GetHexes(pivot.X, pivot.Y, radius, true);

                List <HexaPos> .Enumerator e = hexes.GetEnumerator();
                while (e.MoveNext())
                {
                    Hex currentHex = _topologicalGraph.GetMap().GetHex(e.Current.X, e.Current.Y);
                    if (false == _topologicalGraph.GetMap().MapState.IsObstacle(currentHex))
                    {
                        PlanningNode node = new PlanningNode(e.Current);
                        planningGraph.AddPlanningNode(node, t);
                    }
                }
            }

            // create edge
            for (int t = 0; t < planningLength - 1; t++)
            {
                LevelPartite currentPartite = planningGraph[t];
                LevelPartite nextPartite    = planningGraph[t + 1];

                List <PlanningNode> .Enumerator e1 = currentPartite.mNodes.GetEnumerator();
                List <PlanningNode> .Enumerator e2 = nextPartite.mNodes.GetEnumerator();

                while (e1.MoveNext())
                {
                    while (e2.MoveNext())
                    {
                        if (_topologicalGraph.IsConnected(e1.Current.pos, e2.Current.pos))
                        {
                            currentPartite.Connect(e1.Current, e2.Current);
                        }
                    }

                    e2 = nextPartite.mNodes.GetEnumerator();
                }
            }

            return(planningGraph);
        }
Пример #7
0
        double Backpropagation(int level, int currentNodeIdx, PathPlanningGraph graph, double[,] entropy)
        {
            double[,] localEntropy = (double[, ])entropy.Clone();
            HexaPath subpath = new HexaPath();

            subpath.AddPos(graph[level].mNodes[currentNodeIdx].pos);
            _agent.Update(subpath, localEntropy);
            int endLevel = graph.planningLength - 1;

            double[] estimatedReward = null;
            double[] futureReward    = null;
            double[] instantReward   = null;
            int      nodeNum;
            int      edgeNum;

            for (int l = endLevel; l >= level; l--)
            {
                nodeNum = graph[l].mNodes.Count;
                edgeNum = graph[l].mEdges.Count;

                instantReward = new double[nodeNum];
                futureReward  = new double[nodeNum];

                for (int i = 0; i < nodeNum; i++)
                {
                    PlanningNode node = graph[l].mNodes[i];
                    instantReward[i] = GetEstimation(_agent, localEntropy, node.pos, _map);

                    List <PlanningEdge>             edges = graph[l].GetEdges(node);
                    List <PlanningEdge> .Enumerator e     = edges.GetEnumerator();
                    while (e.MoveNext())
                    {
                        int j = graph[l + 1].GetIndex(e.Current.to);
                        if (estimatedReward[j] > futureReward[i])
                        {
                            futureReward[i] = estimatedReward[j];
                        }
                    }
                }

                estimatedReward = new double[nodeNum];
                for (int i = 0; i < nodeNum; i++)
                {
                    estimatedReward[i] = instantReward[i] + futureReward[i];
                }
            }
            return(futureReward[currentNodeIdx]);
        }
        HexaPath GetMaxPath(int level, PlanningNode startNode, PathPlanningGraph graph, double[][] estimatedReward)
        {
            HexaPath path     = new HexaPath();
            int      endLevel = graph.planningLength - 1;
            //path.AddPos(startNode.pos);
            HexaPos lastPos = startNode.pos;

            for (int t = level + 1; t <= endLevel; t++)
            {
                int index = FindMax(lastPos, estimatedReward[t - level], t, graph);
                path.AddPos(graph[t].mNodes[index].pos);
                lastPos = path[path.Length - 1];
            }

            return(path);
        }
Пример #9
0
        double[] EstimateRewards(int level, PathPlanningGraph graph, double[,] entropy)
        {
            double[,] localEntropy = (double[, ])entropy.Clone();
            int nodeNum = graph[level].mNodes.Count;

            double[] estimatedReward = new double[nodeNum];
            double[] futureReward    = new double[nodeNum];
            double[] instantReward   = new double[nodeNum];
            for (int i = 0; i < nodeNum; i++)
            {
                PlanningNode node = graph[level].mNodes[i];
                instantReward[i]   = GetEstimation(_agent, localEntropy, node.pos, _map);
                futureReward[i]    = Backpropagation(level, i, graph, localEntropy);
                estimatedReward[i] = instantReward[i] + futureReward[i];
            }

            return(estimatedReward);
        }
Пример #10
0
        ExpandingTree GetExclusiveExpandingTree(PathPlanningGraph graph, HexaPos start)
        {
            PlanningNode  startNode     = graph[0].GetNode(start);
            ExpandingNode root          = new ExpandingNode(startNode);
            ExpandingTree expandingTree = new ExpandingTree(root);
            bool          quit          = false;

            for (int l = 0; l < graph.planningLength - 1; l++)
            {
                while (expandingTree.GetNewNodeCountByLevel(l) > 0)
                {
                    quit = false;
                    for (int i = 0; i < expandingTree.nodeList.Count && quit == false; i++)
                    {
                        ExpandingNode currentNode = expandingTree.nodeList[i];
                        if (currentNode.level == l && currentNode.state == ExpandingNode.STATE.NEW)
                        {
                            PlanningNode planNode = currentNode.planningNode;

                            List <PlanningEdge>             edges = graph[l].GetEdges(planNode);
                            List <PlanningEdge> .Enumerator e2    = edges.GetEnumerator();
                            while (e2.MoveNext())
                            {
                                ExpandingNode newNode = new ExpandingNode(e2.Current.to);
                                expandingTree.AddToParent(newNode, currentNode);

                                if (l == graph.planningLength - 2)
                                {
                                    newNode.state = ExpandingNode.STATE.EXPANDED;
                                }
                            }

                            currentNode.state = ExpandingNode.STATE.EXPANDED;

                            quit = true;
                        }
                    }
                }
            }

            return(expandingTree);
        }
Пример #11
0
        int FindMax(HexaPos currentPos, double [] values, int level, PathPlanningGraph graph)
        {
            double refVal = -0.1;
            int    maxIdx = 0;

            PlanningNode        currentNode = graph[level - 1].GetNode(currentPos);
            List <PlanningEdge> edges       = graph[level - 1].GetEdges(currentNode);

            List <PlanningEdge> .Enumerator e = edges.GetEnumerator();
            while (e.MoveNext())
            {
                int nextIdx = graph[level].GetIndex(e.Current.to);
                if (values[nextIdx] > refVal)
                {
                    maxIdx = nextIdx;
                    refVal = values[nextIdx];
                }
            }

            return(maxIdx);
        }
 void Backtrack(Rewards reward, PathPlanningGraph graph, int fromLevel, int stopAt = 0)
 {
     for (int t = fromLevel; t >= stopAt; t--)
     {
         int num = reward.totalRewards[t].Length;
         for (int i = 0; i < num; i++)
         {
             PlanningNode                    node  = graph[t].mNodes[i];
             List <PlanningEdge>             edges = graph[t].GetEdges(node);
             List <PlanningEdge> .Enumerator e     = edges.GetEnumerator();
             while (e.MoveNext())
             {
                 int j = graph[t + 1].GetIndex(e.Current.to);
                 if (reward.totalRewards[t + 1][j] > reward.futureRewards[t][i])
                 {
                     reward.futureRewards[t][i] = reward.totalRewards[t + 1][j];
                 }
             }
             reward.totalRewards[t][i] = reward.instantRewards[t][i] + reward.futureRewards[t][i];
         }
     }
 }
        HexaPos GetMax(Rewards reward, PathPlanningGraph graph, HexaPath path)
        {
            int     pathLen = path.Length;
            HexaPos lastPos = path[pathLen - 1];

            PlanningNode        lastNode = graph[pathLen - 1].GetNode(lastPos);
            List <PlanningEdge> edges    = graph[pathLen - 1].GetEdges(lastNode);
            double  maxVal = -0.01;
            HexaPos maxPos = null;

            List <PlanningEdge> .Enumerator e = edges.GetEnumerator();
            while (e.MoveNext())
            {
                int nextIdx = graph[pathLen].GetIndex(e.Current.to);
                if (reward.totalRewards[pathLen][nextIdx] > maxVal)
                {
                    maxPos = graph[pathLen].mNodes[nextIdx].pos;
                    maxVal = reward.totalRewards[pathLen][nextIdx];
                }
            }

            return(maxPos);
        }
        void UpdateRewardOptEst(Rewards reward, double[,] entropy, PathPlanningGraph graph, HexaPath path)
        {
            int currentLen = path.Length;
            int totalLen   = graph.planningLength;
            int num        = graph[currentLen].mNodes.Count;

            for (int i = 0; i < num; i++)
            {
                PlanningNode node    = graph[currentLen].mNodes[i];
                HexaPath     newpath = new HexaPath();
                newpath.AddPos(node.pos);
                double[,] localEntropy = (double[, ])entropy.Clone();
                _agent.Update(newpath, localEntropy);
                Rewards newreward = new Rewards(reward);
                // update estimation
                UpdateEstimation(newreward, localEntropy, graph, currentLen, totalLen - 1);
                // backtrack
                Backtrack(newreward, graph, totalLen - 2, currentLen);
                reward.instantRewards[currentLen][i] = newreward.instantRewards[currentLen][i];
                reward.futureRewards[currentLen][i]  = newreward.futureRewards[currentLen][i];
                reward.totalRewards[currentLen][i]   = newreward.totalRewards[currentLen][i];
            }
        }
        double[] Backpropagation(int level, PathPlanningGraph graph, double[,] entropy)
        {
            double[,] localEntropy = (double[, ])entropy.Clone();
            int endLevel = graph.planningLength - 1;

            double[][] estimatedReward = new double[graph.planningLength][];
            double[][] futureReward    = new double[graph.planningLength][];
            double[][] instantReward   = new double[graph.planningLength][];
            int        nodeNum;
            int        edgeNum;

            //init indepedent rewards
            double[][] independentReward = new double[graph.planningLength][];
            for (int l = 0; l < graph.planningLength; l++)
            {
                independentReward[l] = new double[graph[l].mNodes.Count];
                estimatedReward[l]   = new double[graph[l].mNodes.Count];
                futureReward[l]      = new double[graph[l].mNodes.Count];
                instantReward[l]     = new double[graph[l].mNodes.Count];
                for (int i = 0; i < graph[l].mNodes.Count; i++)
                {
                    independentReward[l][i] = GetEstimation(_agent, localEntropy, graph[l].mNodes[i].pos, _map);
                    instantReward[l][i]     = independentReward[l][i];
                }
            }

            bool stop = false;

            while (stop == false)
            {
                stop = true;
                for (int l = endLevel; l >= level; l--)
                {
                    nodeNum = graph[l].mNodes.Count;
                    edgeNum = graph[l].mEdges.Count;

                    for (int i = 0; i < nodeNum; i++)
                    {
                        double estRwd = 0.0;
                        double insRwd = instantReward[l][i];
                        double futRwd = 0.0;

                        PlanningNode node = graph[l].mNodes[i];

                        List <PlanningEdge>             edges = graph[l].GetEdges(node);
                        List <PlanningEdge> .Enumerator e     = edges.GetEnumerator();
                        while (e.MoveNext())
                        {
                            int j = graph[l + 1].GetIndex(e.Current.to);
                            if (estimatedReward[l + 1][j] > futRwd)
                            {
                                futRwd = estimatedReward[l + 1][j];
                            }
                        }

                        if (futRwd != futureReward[l][i])
                        {
                            futureReward[l][i] = futRwd;
                            stop = false;
                        }

                        estimatedReward[l][i] = instantReward[l][i] + futureReward[l][i];
                    }

                    // find max node and back feed
                    int maxIdx = FindMax(estimatedReward[l]);
                    for (int i = 0; i < nodeNum; i++)
                    {
                        if (i == maxIdx)
                        {
                            List <RetrackLink> .Enumerator eR = _retrackLinkTable[l][maxIdx].linkList.GetEnumerator();
                            while (eR.MoveNext())
                            {
                                int nodeLevel = eR.Current.level;
                                int nodeIdx   = graph[nodeLevel].GetIndex(eR.Current.retrackNode);
                                instantReward[nodeLevel][nodeIdx] = 0;
                            }
                        }
                        else
                        {
                            List <RetrackLink> .Enumerator eR = _retrackLinkTable[l][maxIdx].linkList.GetEnumerator();
                            while (eR.MoveNext())
                            {
                                int nodeLevel = eR.Current.level;
                                int nodeIdx   = graph[nodeLevel].GetIndex(eR.Current.retrackNode);
                                instantReward[nodeLevel][nodeIdx] = independentReward[nodeLevel][nodeIdx];
                            }
                        }
                    }
                }
            }
            Console.WriteLine("IBP RETRK at level " + level.ToString());
            for (int i = 0; i < estimatedReward[level].Length; i++)
            {
                int posX = graph[level].mNodes[i].pos.X;
                int posY = graph[level].mNodes[i].pos.Y;
                Console.WriteLine("Pos[" + posX.ToString() + "," + posY.ToString() + "]=" + estimatedReward[level][i].ToString());
            }
            return((double[])estimatedReward[level].Clone());
        }
        public override HexaPath FindPath(PathPlanningGraph graph, HexaPos start)
        {
            int planningLength = graph.planningLength;

            _estimated = new Rewards(graph);

            HexaPath path = new HexaPath();

            _optEstimated        = new Rewards(_estimated);
            _pesEstimated        = new Rewards(_estimated);
            double[,] optEntropy = (double[, ])(_localEntropy.Clone());
            double[,] pesEntropy = (double[, ])(_localEntropy.Clone());

            HexaPath optMaxPath = new HexaPath();
            HexaPath pesMaxPath = new HexaPath();

            optMaxPath.AddPos(start);
            pesMaxPath.AddPos(start);

            for (int t = 1; t < planningLength; t++)
            {
                // get path for opt
                _agent.Update(optMaxPath, optEntropy);
                UpdateRewardOptEst(_optEstimated, optEntropy, graph, optMaxPath);
                HexaPos nextOptPos = GetMax(_optEstimated, graph, optMaxPath);
                optMaxPath.AddPos(nextOptPos);

                // get path for pes
                _agent.Update(pesMaxPath, pesEntropy);
                UpdateRewardPesEst(_pesEstimated, pesEntropy, graph, pesMaxPath);
                HexaPos nextPesPos = GetMax(_pesEstimated, graph, pesMaxPath);
                pesMaxPath.AddPos(nextPesPos);
            }

            double optMaxScore = _agent.Score(optMaxPath, _localEntropy);
            double pesMaxScore = _agent.Score(pesMaxPath, _localEntropy);

            int  maxTryCnt = 10;
            bool converged = false;
            int  tryCnt    = 0;

            while (converged == false && tryCnt <= maxTryCnt)
            {
                tryCnt++;

                if (pesMaxScore >= optMaxScore)
                {
                    path      = pesMaxPath;
                    converged = true;
                }
                else
                {
                    path = optMaxPath;

                    // correct the estimation at step t
                    int          diffFrom = pesMaxPath.DifferentAt(optMaxPath);
                    HexaPath     subpath  = optMaxPath.SubPath(diffFrom, optMaxPath.Length - 1);
                    PlanningNode diffNode = graph[diffFrom].GetNode(subpath[0]);
                    int          diffIdx  = graph[diffFrom].GetIndex(diffNode);
                    HexaPath     prevPath = optMaxPath.SubPath(0, diffFrom - 1);
                    double[,] tempEntropy = (double[, ])(_localEntropy.Clone());
                    _agent.Update(prevPath, tempEntropy);
                    _pesEstimated.totalRewards[diffFrom][diffIdx] = _agent.Score(subpath, tempEntropy);

                    HexaPath newCandidatePath = new HexaPath();
                    newCandidatePath.AddPos(start);

                    HexaPath newSubCandidate = EstimatePath(graph, 1, start, _pesEstimated);
                    newCandidatePath.Merge(newSubCandidate);

                    double newCandidateScore = _agent.Score(newSubCandidate, _localEntropy);

                    if (newCandidateScore <= pesMaxScore)
                    {
                        converged = true;
                    }
                    else
                    {
                        pesMaxScore = newCandidateScore;
                        pesMaxPath  = newCandidatePath;
                    }
                }
            }

            return(path);
        }
Пример #17
0
        double GetEstimatedMaxFutureReward(PlanningNode node, HexaPath path, double[,] entropy, PathPlanningGraph graph)
        {
            int endLevel     = graph.planningLength - 1;
            int currentLevel = path.Length - 1;

            if (endLevel == currentLevel)
            {
                return(0.0);
            }

            double maxFutureScore = 0.0;

            // backtrack

            //start from end level, init future score as 0
            int nodeNum = 0;

            double[] futureScore  = null;
            double[] instantScore = null;
            double[] totalScore   = null;

            for (int l = endLevel; l > currentLevel; l--)
            {
                nodeNum      = graph[l].mNodes.Count;
                futureScore  = new double[nodeNum];
                instantScore = new double[nodeNum];

                for (int i = 0; i < nodeNum; i++)
                {
                    PlanningNode tempNode = graph[l].mNodes[i];
                    instantScore[i] = GetEstimation(_agent, entropy, tempNode.pos, _map);

                    if (l < endLevel)
                    {
                        List <PlanningEdge>             edges = graph[l].GetEdges(tempNode);
                        List <PlanningEdge> .Enumerator e     = edges.GetEnumerator();
                        while (e.MoveNext())
                        {
                            int j = graph[l + 1].GetIndex(e.Current.to);
                            if (totalScore[j] > futureScore[i])
                            {
                                futureScore[i] = totalScore[j];
                            }
                        }
                    }
                    else
                    {
                        futureScore[i] = 0.0;
                    }
                }

                totalScore = new double[nodeNum];
                for (int i = 0; i < nodeNum; i++)
                {
                    totalScore[i] = instantScore[i] + futureScore[i];
                }
            }

            // estimate future reward
            HexaPos             currentPos = node.pos;
            List <PlanningEdge> nextEdges  = graph[currentLevel].GetEdges(node);

            List <PlanningEdge> .Enumerator enumEdge = nextEdges.GetEnumerator();
            while (enumEdge.MoveNext())
            {
                int j = graph[currentLevel + 1].GetIndex(enumEdge.Current.to);
                if (totalScore[j] > maxFutureScore)
                {
                    maxFutureScore = totalScore[j];
                }
            }

            return(maxFutureScore);
        }
Пример #18
0
        public override HexaPath FindPath(PathPlanningGraph graph, HexaPos start)
        {
            HexaPath path         = null;
            double   currentScore = 0.0;

            PlanningNode  startNode     = graph[0].GetNode(start);
            ExpandingNode root          = new ExpandingNode(startNode);
            ExpandingTree expandingTree = new ExpandingTree(root);

            List <double> scoreList = new List <double>();

            bool exhaustivelyEnumerated = false;
            bool stopCritera            = false;
            int  counter = 0;

            HexaPath maxPath  = null;
            double   maxScore = 0.0;

            do
            {
                path = ExpandToFindPath(expandingTree, graph, _localEntropy);

                if (path == null)
                {
                    stopCritera = true;
                }
                else
                {
                    currentScore = ScorePath(_agent, _localEntropy, path);
                    if (currentScore > maxScore)
                    {
                        maxScore = currentScore;
                        maxPath  = path;
                    }
                }


                scoreList.Add(currentScore);
                expandingTree.Freeze(maxScore);

                if (counter == 0)
                {
                    scoreAtFirstRun = currentScore;
                }

                //expandingTree.Draw("Expanding-Tree-" + counter.ToString());
                counter++;

                Console.WriteLine(counter + ", " + currentScore + ", " + maxScore + ", " + expandingTree.nodeNum);
            }while((iteratingOnce == false || exhaustivelyEnumerated == true) && (stopCritera == false));

            totalRunTime      = scoreList.Count;
            finalMaxScore     = maxScore;
            hitOptimalRunTime = FindMaxScoreIndex(scoreList, maxScore);

            //expandingTree.Draw("Expanding-Tree-N");
            Console.WriteLine("The number of node expanded is " + expandingTree.nodeNum);
            exploredSize = expandingTree.nodeNum;

            Console.WriteLine("The number of complete expanding node is " + graph.GetExpandingNodeNumber());
            problemSize = graph.GetExpandingNodeNumber();

            return(maxPath);
        }
 public RetrackLink(int lvl, PlanningNode node)
 {
     level       = lvl;
     retrackNode = node;
 }
        double Backpropagation(int level, int currentNodeIdx, PathPlanningGraph graph, double[,] entropy)
        {
            double[,] localEntropy = (double[, ])entropy.Clone();
            HexaPath     subpath   = new HexaPath();
            PlanningNode startNode = graph[level].mNodes[currentNodeIdx];

            subpath.AddPos(startNode.pos);
            _agent.Update(subpath, localEntropy);
            int endLevel = graph.planningLength - 1;
            int nodeNum;
            int edgeNum;

            double[][] estimatedReward = new double[endLevel - level + 1][];
            double[][] futureReward    = new double[endLevel - level + 1][];
            double[][] instantReward   = new double[endLevel - level + 1][];
            for (int l = level; l <= endLevel; l++)
            {
                nodeNum = graph[l].mNodes.Count;

                estimatedReward[l - level] = new double[nodeNum];
                futureReward[l - level]    = new double[nodeNum];
                instantReward[l - level]   = new double[nodeNum];

                for (int i = 0; i < nodeNum; i++)
                {
                    PlanningNode node = graph[l].mNodes[i];
                    instantReward[l - level][i] = GetEstimation(_agent, localEntropy, node.pos, _map);
                }
            }

            nodeNum = graph[endLevel].mNodes.Count;
            for (int i = 0; i < nodeNum; i++)
            {
                estimatedReward[endLevel - level][i] = instantReward[endLevel - level][i];
            }

            for (int l = endLevel - 1; l >= level; l--)
            {
                nodeNum = graph[l].mNodes.Count;
                edgeNum = graph[l].mEdges.Count;

                for (int i = 0; i < nodeNum; i++)
                {
                    PlanningNode                    node  = graph[l].mNodes[i];
                    List <PlanningEdge>             edges = graph[l].GetEdges(node);
                    List <PlanningEdge> .Enumerator e     = edges.GetEnumerator();
                    while (e.MoveNext())
                    {
                        int j = graph[l + 1].GetIndex(e.Current.to);
                        if (estimatedReward[l - level + 1][j] > futureReward[l - level][i])
                        {
                            futureReward[l - level][i] = estimatedReward[l - level + 1][j];
                        }
                    }
                }

                for (int i = 0; i < nodeNum; i++)
                {
                    estimatedReward[l - level][i] = instantReward[l - level][i] + futureReward[l - level][i];
                }
            }

            // refine and return the best estimatedReward[level+1][]
            double maxActualReward = 0.0;

            //int maxIdx = GetMaxIdx(futureReward[0]);
            //double maxExpReward = futureReward[0][maxIdx];

            //while (maxActualReward < maxExpReward)
            {
                HexaPath localMaxPath = GetMaxPath(level, startNode, graph, estimatedReward);
                maxActualReward = _agent.Score(localMaxPath, localEntropy);
                //futureReward[0][maxIdx] = maxActualReward;

                //maxIdx = GetMaxIdx(futureReward[0]);
                //maxExpReward = futureReward[0][maxIdx];
            }

            return(maxActualReward); // futureReward[0][currentNodeIdx];
        }