示例#1
0
        static internal EdgesAndExpectations GetStaticStrategy(Graph graph,
                                                               int[] sources,
                                                               HSet targets,
                                                               int nStates,
                                                               int resetCost,
                                                               int [] deadStates)
        {
            foreach (int t in targets)
            {
                foreach (Edge l in graph.EdgesAtVertex(t))
                {
                    l.weight = 0;
                }
            }

            //fix the edges weights
            foreach (Edge l in graph.MustEdges)
            {
                if (l.target >= nStates)
                {
                    l.weight = 0;
                }
            }
            foreach (Edge l in graph.OptionalEdges)
            {
                l.weight = resetCost;
            }



            if (graph.NumberOfVertices > 1000)//Value iteration becomes too slow
            {
                return(graph.GetStaticStrategyWithDistancesToAcceptingStates(sources, targets.ToArray(typeof(int)) as int[]));
            }


            HSet deadStatesSet = new HSet(deadStates);

            //create reachableGraph
            bool [] reachableVerts = new bool[graph.NumberOfVertices];

            //we have to walk backwards from the targets avoiding dead states
            graph.InitBackwardEdges();

            foreach (int i in targets)
            {
                reachableVerts[i] = true;
            }

            System.Collections.Queue queue = new System.Collections.Queue(targets);

            while (queue.Count > 0)
            {
                int i = (int)queue.Dequeue();
                foreach (int v in graph.Pred(i))
                {
                    if (!reachableVerts[v] && !deadStatesSet.Contains(v))
                    {
                        queue.Enqueue(v);
                        reachableVerts[v] = true;
                    }
                }
            }

            int numberOfReachableVerts = 0;

            foreach (bool b in reachableVerts)
            {
                if (b)
                {
                    numberOfReachableVerts++;
                }
            }

            Edge[]    strategyEdges;
            double [] expectations;

            if (numberOfReachableVerts == graph.NumberOfVertices)
            {
                expectations = GetExpectations(graph, /* sources,*/ targets, nStates);

                if (expectations == null)
                {
                    return(new EdgesAndExpectations());
                }

                strategyEdges = new Edge[nStates];

                for (int i = 0; i < nStates && i < graph.NumberOfVertices; i++)
                {
                    if (targets.Contains(i) || deadStatesSet.Contains(i))
                    {
                        continue;
                    }

                    double min = Single.MaxValue;

                    Edge stEdge = null;

                    foreach (Edge l in graph.EdgesAtVertex(i))
                    {
                        int j = l.target;
                        if (expectations[j] < min)
                        {
                            min = expectations[j];

                            stEdge = l;
                        }
                    }


                    strategyEdges[i] = stEdge;
                }
            }
            else
            { //numberOfReachableVerts<graph.NumberOfVertices)
                int [] graphToRG = new int[graph.NumberOfVertices];
                //reachable graph to graph
                int [] rGToGraph = new int[numberOfReachableVerts];

                int count    = 0;
                int rNStates = 0;
                for (int i = 0; i < reachableVerts.Length; i++)
                {
                    if (reachableVerts[i])
                    {
                        graphToRG[i]     = count;
                        rGToGraph[count] = i;
                        count++;
                        if (i < nStates)
                        {
                            rNStates++;
                        }
                    }
                }

                System.Collections.ArrayList mustEdges = new System.Collections.ArrayList();

                foreach (Edge l in graph.MustEdges)
                {
                    if (reachableVerts[l.source] && reachableVerts[l.target])
                    {
                        Edge ml = new Edge(graphToRG[l.source], graphToRG[l.target], l.label, l.weight);
                        mustEdges.Add(ml);
                    }
                }

                System.Collections.ArrayList nondVerts = new System.Collections.ArrayList();

                for (int i = nStates; i < graph.NumberOfVertices; i++)
                {
                    if (reachableVerts[i])
                    {
                        nondVerts.Add(graphToRG[i]);
                    }
                }

                Graph rGraph = new Graph(0, mustEdges.ToArray(typeof(Edge)) as Edge[], new Edge[0],
                                         nondVerts.ToArray(typeof(int)) as int[], true, WeakClosureEnum.DoNotClose);


                int [] rSources = new int[sources.Length];
                int    c        = 0;
                foreach (int s in sources)
                {
                    rSources[c++] = graphToRG[s];
                }

                HSet rTargets = new HSet();

                foreach (int s in targets)
                {
                    if (reachableVerts[s])
                    {
                        rTargets.Insert(graphToRG[s]);
                    }
                }

                double [] rExpectations = GetExpectations(rGraph, /*rSources,*/ rTargets, rNStates);

                if (rExpectations == null)
                {
                    return(new EdgesAndExpectations());
                }

                strategyEdges = new Edge[nStates];

                for (int i = 0; i < nStates; i++)
                {
                    if (!reachableVerts[i])
                    {
                        continue;
                    }

                    if (targets.Contains(i) || deadStatesSet.Contains(i))
                    {
                        continue;
                    }

                    double min = Single.MaxValue;

                    Edge stEdge = null;

                    foreach (Edge l in graph.EdgesAtVertex(i))
                    {
                        int j = l.target;

                        if (reachableVerts[j])
                        {
                            if (rExpectations[graphToRG[j]] < min)
                            {
                                min    = rExpectations[graphToRG[j]];
                                stEdge = l;
                            }
                        }
                    }


                    strategyEdges[i] = stEdge;
                }


                expectations = new double[graph.NumberOfVertices];
                if (expectations == null)
                {
                    return(new EdgesAndExpectations());
                }

                for (int i = 0; i < expectations.Length; i++)
                {
                    expectations[i] = Int32.MaxValue;
                }


                for (int i = 0; i < rExpectations.Length; i++)
                {
                    expectations[rGToGraph[i]] = rExpectations[i];
                }
            }

            graph.CleanTheStrategy(strategyEdges, sources);

            return(new EdgesAndExpectations(strategyEdges, expectations));
        }
示例#2
0
        internal static EdgesAndExpectations GetStaticStrategy(Graph graph,
            int[] sources,
            HSet targets,
            int nStates,
            int resetCost,
            int []deadStates)
        {
            foreach(int t in targets){
            foreach(Edge l in graph.EdgesAtVertex(t))
              l.weight=0;
              }

              //fix the edges weights
              foreach(Edge l in graph.MustEdges){
            if(l.target>=nStates)
              l.weight=0;
              }
              foreach(Edge l in graph.OptionalEdges)
            l.weight=resetCost;

              if(graph.NumberOfVertices>1000){//Value iteration becomes too slow
            return graph.GetStaticStrategyWithDistancesToAcceptingStates(sources, targets.ToArray(typeof(int)) as int[]);
              }

              HSet deadStatesSet=new HSet(deadStates);

              //create reachableGraph
              bool []reachableVerts=new bool[graph.NumberOfVertices];

              //we have to walk backwards from the targets avoiding dead states
              graph.InitBackwardEdges();

              foreach(int i in targets)
            reachableVerts[i]=true;

              System.Collections.Queue queue=new System.Collections.Queue(targets);

              while(queue.Count>0)
            {
              int i=(int)queue.Dequeue();
              foreach(int v in graph.Pred(i))
            {
              if(!reachableVerts[v] && !deadStatesSet.Contains(v))
                {
                  queue.Enqueue(v);
                  reachableVerts[v]=true;
                }

            }
            }

              int numberOfReachableVerts=0;
              foreach(bool b in reachableVerts)
            if(b)
              numberOfReachableVerts++;

              Edge[] strategyEdges;
              double [] expectations;

              if(numberOfReachableVerts==graph.NumberOfVertices)
            {
              expectations=GetExpectations(graph,/* sources,*/targets,nStates);

              if(expectations==null)
            return new EdgesAndExpectations();

              strategyEdges=new Edge[nStates];

              for(int i=0;i<nStates&&i<graph.NumberOfVertices;i++){

            if(targets.Contains(i)||deadStatesSet.Contains(i))
              continue;

            double min=Single.MaxValue;

            Edge stEdge=null;

            foreach(Edge l in graph.EdgesAtVertex(i)){
              int j=l.target;
              if(expectations[j]<min){
                min=expectations[j];

                stEdge=l;
              }
            }

            strategyEdges[i]=stEdge;
              }

            }
              else
            { //numberOfReachableVerts<graph.NumberOfVertices)

              int [] graphToRG=new int[graph.NumberOfVertices];
              //reachable graph to graph
              int [] rGToGraph=new int[numberOfReachableVerts];

              int count=0;
              int rNStates=0;
              for(int i=0;i<reachableVerts.Length;i++)
            if(reachableVerts[i])
              {
                graphToRG[i]=count;
                rGToGraph[count]=i;
                count++;
                if(i<nStates)
                  rNStates++;
              }

              System.Collections.ArrayList mustEdges=new System.Collections.ArrayList();

              foreach(Edge l in graph.MustEdges)
            {
              if( reachableVerts[l.source]&& reachableVerts[l.target])
                {
                  Edge ml=new Edge(graphToRG[l.source],graphToRG[l.target], l.label,l.weight);
                  mustEdges.Add(ml);
                }
            }

              System.Collections.ArrayList nondVerts=new System.Collections.ArrayList();

              for(int i=nStates;i<graph.NumberOfVertices;i++)
            {
              if(reachableVerts[i])
                nondVerts.Add(graphToRG[i]);
            }

              Graph rGraph=new Graph(0,mustEdges.ToArray(typeof(Edge)) as Edge[],new Edge[0],
                                 nondVerts.ToArray(typeof(int)) as int[],true,WeakClosureEnum.DoNotClose);

              int []rSources=new int[sources.Length];
              int c=0;
              foreach(int s in sources)
            {
              rSources[c++]=graphToRG[s];
            }

              HSet rTargets=new HSet();

              foreach(int s in targets)
            {
              if( reachableVerts[s])
                {
                  rTargets.Insert(graphToRG[s]);
                }
            }

              double []rExpectations=GetExpectations(rGraph,/*rSources,*/ rTargets,rNStates);

              if(rExpectations==null)
            return new EdgesAndExpectations();

              strategyEdges=new Edge[nStates];

              for(int i=0;i<nStates;i++){

            if(!reachableVerts[i])
              continue;

            if(targets.Contains(i)||deadStatesSet.Contains(i))
              continue;

            double min=Single.MaxValue;

            Edge stEdge=null;

            foreach(Edge l in graph.EdgesAtVertex(i)){
              int j=l.target;

              if(reachableVerts[j])
                if(rExpectations[graphToRG[j]]<min){
                  min=rExpectations[graphToRG[j]];
                  stEdge=l;
                }
            }

            strategyEdges[i]=stEdge;
              }

              expectations=new double[graph.NumberOfVertices];
              if(expectations==null)
            return new EdgesAndExpectations();

              for(int i=0;i<expectations.Length;i++)
            expectations[i]=Int32.MaxValue;

              for(int i=0;i<rExpectations.Length;i++)
            expectations[rGToGraph[i]]=rExpectations[i];

            }

              graph.CleanTheStrategy(strategyEdges,sources);

              return new EdgesAndExpectations(strategyEdges, expectations);
        }
示例#3
0
        /*
         * From "Play to test"
         * Value iteration is the most widely used algorithm for solving discounted Markov decision
         * problems (see e.g. [21]). Reachability games give rise to non-discounted Markov
         * decision problems. Nevertheless the value iteration algorithm applies; this is a practical
         * approach for computing strategies for transient test graphs. Test graphs, modified by inserting
         * a zero-cost edge (0; 0), correspond to a subclass of negative stationary Markov
         * decision processes (MDPs) with an infinite horizon, where rewards are negative and
         * thus regarded as costs, strategies are stationary, i.e. time independent, and there is no
         * finite upper bound on the number of steps in the process. The optimization criterion
         * for our strategies corresponds to the expected total reward criterion, rather than the
         * expected discounted reward criterion used in discounted Markov decision problems.
         * Let G = (V;E; V a; V p; g; p; c) be a test graph modified by inserting a zero-cost
         * edge (0; 0). The classical value iteration algorithm works as follows on G.
         *
         * Value iteration Let n = 0 and let M0 be the zero vector with coordinates V so that
         * every M0[u] = 0. Given n and Mn, we compute Mn+1 (and then increment n):
         * Mn+1[u] ={ min {c(u,v) +Mn[v]:(u,v) in E} if u is an active state}
         * or sum {p(u,v)*(c(u,v) +Mn[v]); if u is a choice point
         *
         * Value iteration for negative MDPs with the expected total reward criterion, or negative
         * Markov decision problems for short, does not in general converge to an optimal
         * solution, even if one exists. However, if there exists a strategy for which the the expected
         * cost is finite for all states [21, Assumption 7.3.1], then value iteration does converge for
         * negative Markov decision problems [21, Theorem 7.3.10]. In light of lemmas 2 and 3,
         * this implies that value iteration converges for transient test graphs. Let us make this
         * more precise, as a corollary of Theorem 7.3.10 in [21].
         */

        //nStates marks the end of active states, choice points start after that
        static double[] ValueIteration(Graph graph, HSet targets, int nStates) //ValueIteration(Graph graph,int[] sources,HSet targets,int nStates)
        {
            graph.InitEdgeProbabilities();

            double[] v0  = new double[graph.NumberOfVertices];
            double[] v1  = new double[graph.NumberOfVertices];
            double   eps = 1.0E-6;
            double   delta;

            double[] v = v0;
            //double[] vnew=v1;


            //      CheckTransience(graph,targets);


            int nOfIter = 0;

            do
            {
                delta = 0;


                for (int i = 0; i < nStates && i < graph.NumberOfVertices; i++)
                {
                    if (targets.Contains(i))
                    {
                        continue;
                    }

                    double min = Double.MaxValue;

                    foreach (Edge l in graph.EdgesAtVertex(i))
                    {
                        double r = ((double)l.weight) + v[l.target];
                        if (r < min)
                        {
                            min = r;
                        }
                    }
                    if (min != Double.MaxValue)
                    {
                        v1[i] = min;
                        if (delta < min - v[i])
                        {
                            delta = min - v[i];
                        }
                    }
                }

                for (int i = nStates; i < graph.NumberOfVertices; i++)
                {
                    if (targets.Contains(i))
                    {
                        continue;
                    }

                    double r = 0;
                    foreach (Edge l in graph.EdgesAtVertex(i))
                    {
                        r += graph.EdgeProbability(l) * (((double)l.weight) + v[l.target]);
                    }

                    v1[i] = r;
                    if (delta < r - v[i])
                    {
                        delta = r - v[i];
                    }
                }


                nOfIter++;

                //swap v and v1
                double[] vtmp = v;
                v  = v1;
                v1 = vtmp;
            }while(delta > eps && nOfIter < 10000);

            if (delta > eps)
            {
                return(null); //the result is erroneous
            }

            return(v);
        }
示例#4
0
        //ValueIteration(Graph graph,int[] sources,HSet targets,int nStates)
        /*
          From "Play to test"
          Value iteration is the most widely used algorithm for solving discounted Markov decision
          problems (see e.g. [21]). Reachability games give rise to non-discounted Markov
          decision problems. Nevertheless the value iteration algorithm applies; this is a practical
          approach for computing strategies for transient test graphs. Test graphs, modified by inserting
          a zero-cost edge (0; 0), correspond to a subclass of negative stationary Markov
          decision processes (MDPs) with an infinite horizon, where rewards are negative and
          thus regarded as costs, strategies are stationary, i.e. time independent, and there is no
          finite upper bound on the number of steps in the process. The optimization criterion
          for our strategies corresponds to the expected total reward criterion, rather than the
          expected discounted reward criterion used in discounted Markov decision problems.
          Let G = (V;E; V a; V p; g; p; c) be a test graph modified by inserting a zero-cost
          edge (0; 0). The classical value iteration algorithm works as follows on G.

          Value iteration Let n = 0 and let M0 be the zero vector with coordinates V so that
          every M0[u] = 0. Given n and Mn, we compute Mn+1 (and then increment n):
          Mn+1[u] ={ min {c(u,v) +Mn[v]:(u,v) in E} if u is an active state}
          or sum {p(u,v)*(c(u,v) +Mn[v]); if u is a choice point

          Value iteration for negative MDPs with the expected total reward criterion, or negative
          Markov decision problems for short, does not in general converge to an optimal
          solution, even if one exists. However, if there exists a strategy for which the the expected
          cost is finite for all states [21, Assumption 7.3.1], then value iteration does converge for
          negative Markov decision problems [21, Theorem 7.3.10]. In light of lemmas 2 and 3,
          this implies that value iteration converges for transient test graphs. Let us make this
          more precise, as a corollary of Theorem 7.3.10 in [21].
        */
        //nStates marks the end of active states, choice points start after that
        static double[] ValueIteration(Graph graph, HSet targets, int nStates)
        {
            graph.InitEdgeProbabilities();

              double[]v0=new double[graph.NumberOfVertices];
              double[]v1=new double[graph.NumberOfVertices];
              double eps=1.0E-6;
              double delta;
              double[] v=v0;
              //double[] vnew=v1;

              //      CheckTransience(graph,targets);

              int nOfIter=0;
              do{

            delta=0;

            for(int i=0;i<nStates&&i<graph.NumberOfVertices;i++){

              if(targets.Contains(i))
            continue;

              double min=Double.MaxValue;

              foreach(Edge l in graph.EdgesAtVertex(i)){
            double r=((double)l.weight)+v[l.target];
            if(r<min)
              min=r;
              }
              if(min!=Double.MaxValue){
            v1[i]=min;
            if(delta<min-v[i])
              delta=min-v[i];
              }

            }

            for(int i=nStates;i<graph.NumberOfVertices;i++){
              if(targets.Contains(i))
            continue;

              double r=0;
              foreach(Edge l in graph.EdgesAtVertex(i))
            r+=graph.EdgeProbability(l)*(((double)l.weight)+v[l.target]);

              v1[i]=r;
              if(delta<r-v[i])
            delta=r-v[i];
            }

            nOfIter++;

            //swap v and v1
            double[] vtmp=v;
            v=v1;
            v1=vtmp;
              }
              while(delta>eps && nOfIter<10000);

              if(delta>eps){
            return null; //the result is erroneous
              }

              return v;
        }