/// <summary> /// Q值单步更新函数 /// </summary> /// <param name="InQ">以类Qclass为元素的List数组</param> /// <param name="index">动作序号</param> /// <param name="V">下一个状态所对应的最大的Q值</param> /// <param name="R">奖赏值</param> /// 过程: public static void UpdateQtable(List <Qclass> InQ, int index, double V, double R) { double learnrate = 1.0 / (double)(1 + InQ[index].visttime); Qclass testq = new Qclass(); testq.Qvalue = (1.0 - learnrate) * (InQ[index].Qvalue) + learnrate * (R + discount * V); testq.visttime = InQ[index].visttime + 1; InQ[index] = testq; }
public void InitialQTable(List <List <Qclass> > Qtable) { //初始化Q表 Richard for (int i = 0; i < Math.Pow(OStateLength, 4.0); i++) { List <Qclass> InQtable = new List <Qclass>(); int[] osArray = IndexToOstate(i); int[] stArray = sortArray(osArray, 4); for (int j = 0; j < Math.Pow(OActionLength, 4.0); j++) { int[] actArray = IndexToAction(j); Qclass t = new Qclass(); t.Qvalue = 0; t.visttime = 0; if (boolOfQvalue(stArray, actArray, 4)) { t.flag = 1; } else { t.flag = 0; } ////////////////////////////////////////////////////////////////////////// //Richard start int temp = 1; for (int k = 0; k < osArray.Length; k++) { for (int l = 0; l < stArray.Length; l++) { temp += Math.Abs(osArray[k] - stArray[l]); } } temp = 1 / temp; t.Qvalue = temp; //Richard end ////////////////////////////////////////////////////////////////////////// InQtable.Add(t); } Qtable.Add(InQtable); } }
static void Main(string[] args) { List<OState[]> State = new List<OState[]>();//瀹氫箟涓€涓姸鎬侀泦鏁扮粍 List<int[]> Action = new List<int[]>();//瀹氫箟涓€涓姩浣滈泦鏁扮粍 List<List<Qclass>> Qtable = new List<List<Qclass>>();//瀹氫箟浜嗕竴涓猀琛? Console.WriteLine("The message below is:"); Manuevent.Reset(); //下面的for循环是初始化状态数组 for (int i = 0; i < OStateLength; i++) { for (int j = 0; j < OStateLength; j++) { for (int m = 0; m < OStateLength; m++) { for (int n = 0; n < OStateLength; n++) { OState[] S = { (OState)(i), (OState)(j), (OState)(m), (OState)(n) }; State.Add(S); }//end of 一重for }//end of 二重for }//end of 三重for }//end of 四重for //下面的for循环是初始化动作集数组 for (int i = 0; i < OActionLength; i++) { for (int j = 0; j < OActionLength; j++) { for (int m = 0; m < OActionLength; m++) { for (int n = 0; n < OActionLength; n++) { int[] A = new int[4] { OAction[i], OAction[j], OAction[m], OAction[n] }; Action.Add(A); }//end of 一重for }//end of 二重for }//end of 三重for }//end of 四重for ////初始化Q表 //for (int i = 0; i < Math.Pow(OStateLength, 4.0); i++) //{ // List<Qclass> InQtable = new List<Qclass>(); // int[] osArray = IndexToOstate(i); // int[] stArray = sortArray(osArray, 4); // for (int j = 0; j < Math.Pow(OActionLength, 4.0); j++) // { // int[] actArray = IndexToAction(j); // Qclass t = new Qclass(); // t.Qvalue = 0; // t.visttime = 0; // if (boolOfQvalue(stArray, actArray, 4)) // t.flag = 1; // else // t.flag = 0; // InQtable.Add(t); // } // Qtable.Add(InQtable); //} ////////////////////////////////////////////////////////////////////////// //初始化Q表 Richard start for (int i = 0; i < Math.Pow(OStateLength, 4.0); i++) { List<Qclass> InQtable = new List<Qclass>(); int[] osArray = IndexToOstate(i); int[] stArray = sortArray(osArray, 4); for (int j = 0; j < Math.Pow(OActionLength, 4.0); j++) { int[] actArray = IndexToAction(j); Qclass t = new Qclass(); t.Qvalue = 0; t.visttime = 0; if (boolOfQvalue(stArray, actArray, 4)) t.flag = 1; else t.flag = 0; ////////////////////////////////////////////////////////////////////////// //Richard start //int actSum = 0; //int osSum = 0; //for (int k = 0; k < actArray.Length; k++) //{ // actSum += actArray[k]; //} //for (int k = 0; k < osArray.Length; k++) //{ // osSum += osArray[k]; //} double temp = 1; for (int k = 0; k < osArray.Length; k++) { for (int l = 0; l < actArray.Length; l++) { temp += Math.Abs(osArray[k] - actArray[l]); } } temp = 1 / temp; t.Qvalue = temp; //Richard end ////////////////////////////////////////////////////////////////////////// InQtable.Add(t); } Qtable.Add(InQtable); } //初始化Q表 Richard end ////////////////////////////////////////////////////////////////////////// //初始化Q表 Richard //InitialQTable(Qtable); Program.ListenThread = new Thread(RunServer); ListenThread.Start(); /*************************************************/ int[] NextIntArr = new int[4];//= MyRandom(4, 1, 25); Manuevent.WaitOne(); Manuevent.Reset(); int k_1 = 0; foreach (byte j in bytefour) { NextIntArr[k_1] = (int)j; k_1++; } MyWrite(bytefour); /**************************************************/ Random ra = new Random(); int count = 10000;//循环次数 int ii = 0; while ((ii++) < count) { //获取当前的状态 int[] IntArr = NextIntArr; //Console.Write("当前的排队长度为:"); //foreach (int d in IntArr) // Console.Write(d.ToString()+" "); //Console.WriteLine("\n"); //进行排队长度到当前状态的转换 OState[] CurrentState = IntArrToOStableArr(IntArr, 4); //当前状态的序号 int MyIndexOfS = IndexOfState(CurrentState); List<int> QP = QvalueToP(Qtable[MyIndexOfS]); //Console.WriteLine("{0}",ra.Next(1,25)); //要采取动作的序号 int MyIndexOfA = IndexOfAction(ra.Next(0, QP[QP.Count - 1]), QP); int[] CurrntAction = IndexToAction(MyIndexOfA); //输出一组配时 Console.Write("输出一组配时:"); foreach (int ss in CurrntAction) { Console.Write("{0} ", ss); } Console.WriteLine("\n"); int k_2 = 0; foreach (int ca in CurrntAction) { bytefour[k_2] = (byte)ca; k_2++; } newClient.Send(bytefour); //等待几个周期 //采取配时后产生一组新的排队长度 /***************************************/ Console.WriteLine("等待下组排队长度!"); Manuevent.WaitOne(); Manuevent.Reset(); int k_3 = 0; foreach (byte j in bytefour) { NextIntArr[k_3] = (int)j; k_3++; } MyWrite(bytefour); /******************************************/ OState[] NextState = IntArrToOStableArr(NextIntArr, 4); //把排队长度换成排队状态 int MyIndexOfNextS = IndexOfState(NextState);//下一个状态的序号--获取排队状态的序列号 double V = MaxQOfS(Qtable[MyIndexOfNextS]);//下一个状态所对应的Q值的最大值 double R = Reward(IntArr, NextIntArr, 4);//奖赏函数 //更新Q值 UpdateQtable(Qtable[MyIndexOfS], MyIndexOfA, V, R); } }
public void InitialQTable(List<List<Qclass>> Qtable) { //初始化Q表 Richard for (int i = 0; i < Math.Pow(OStateLength, 4.0); i++) { List<Qclass> InQtable = new List<Qclass>(); int[] osArray = IndexToOstate(i); int[] stArray = sortArray(osArray, 4); for (int j = 0; j < Math.Pow(OActionLength, 4.0); j++) { int[] actArray = IndexToAction(j); Qclass t = new Qclass(); t.Qvalue = 0; t.visttime = 0; if (boolOfQvalue(stArray, actArray, 4)) t.flag = 1; else t.flag = 0; ////////////////////////////////////////////////////////////////////////// //Richard start int temp = 1; for (int k = 0; k < osArray.Length; k++) { for (int l = 0; l < stArray.Length; l++) { temp += Math.Abs(osArray[k] - stArray[l]); } } temp = 1 / temp; t.Qvalue = temp; //Richard end ////////////////////////////////////////////////////////////////////////// InQtable.Add(t); } Qtable.Add(InQtable); } }
/// <summary> /// Q值单步更新函数 /// </summary> /// <param name="InQ">以类Qclass为元素的List数组</param> /// <param name="index">动作序号</param> /// <param name="V">下一个状态所对应的最大的Q值</param> /// <param name="R">奖赏值</param> /// 过程: public static void UpdateQtable(List<Qclass> InQ, int index, double V, double R) { double learnrate = 1.0 / (double)(1 + InQ[index].visttime); Qclass testq = new Qclass(); testq.Qvalue = (1.0 - learnrate) * (InQ[index].Qvalue) + learnrate * (R + discount * V); testq.visttime = InQ[index].visttime + 1; InQ[index] = testq; }
static void Main(string[] args) { List <OState[]> State = new List <OState[]>(); //瀹氫箟涓€涓姸鎬侀泦鏁扮粍 List <int[]> Action = new List <int[]>(); //瀹氫箟涓€涓姩浣滈泦鏁扮粍 List <List <Qclass> > Qtable = new List <List <Qclass> >(); //瀹氫箟浜嗕竴涓猀琛? Console.WriteLine("The message below is:"); Manuevent.Reset(); //下面的for循环是初始化状态数组 for (int i = 0; i < OStateLength; i++) { for (int j = 0; j < OStateLength; j++) { for (int m = 0; m < OStateLength; m++) { for (int n = 0; n < OStateLength; n++) { OState[] S = { (OState)(i), (OState)(j), (OState)(m), (OState)(n) }; State.Add(S); } //end of 一重for } //end of 二重for } //end of 三重for } //end of 四重for //下面的for循环是初始化动作集数组 for (int i = 0; i < OActionLength; i++) { for (int j = 0; j < OActionLength; j++) { for (int m = 0; m < OActionLength; m++) { for (int n = 0; n < OActionLength; n++) { int[] A = new int[4] { OAction[i], OAction[j], OAction[m], OAction[n] }; Action.Add(A); } //end of 一重for } //end of 二重for } //end of 三重for } //end of 四重for ////初始化Q表 //for (int i = 0; i < Math.Pow(OStateLength, 4.0); i++) //{ // List<Qclass> InQtable = new List<Qclass>(); // int[] osArray = IndexToOstate(i); // int[] stArray = sortArray(osArray, 4); // for (int j = 0; j < Math.Pow(OActionLength, 4.0); j++) // { // int[] actArray = IndexToAction(j); // Qclass t = new Qclass(); // t.Qvalue = 0; // t.visttime = 0; // if (boolOfQvalue(stArray, actArray, 4)) // t.flag = 1; // else // t.flag = 0; // InQtable.Add(t); // } // Qtable.Add(InQtable); //} ////////////////////////////////////////////////////////////////////////// //初始化Q表 Richard start for (int i = 0; i < Math.Pow(OStateLength, 4.0); i++) { List <Qclass> InQtable = new List <Qclass>(); int[] osArray = IndexToOstate(i); int[] stArray = sortArray(osArray, 4); for (int j = 0; j < Math.Pow(OActionLength, 4.0); j++) { int[] actArray = IndexToAction(j); Qclass t = new Qclass(); t.Qvalue = 0; t.visttime = 0; if (boolOfQvalue(stArray, actArray, 4)) { t.flag = 1; } else { t.flag = 0; } ////////////////////////////////////////////////////////////////////////// //Richard start //int actSum = 0; //int osSum = 0; //for (int k = 0; k < actArray.Length; k++) //{ // actSum += actArray[k]; //} //for (int k = 0; k < osArray.Length; k++) //{ // osSum += osArray[k]; //} double temp = 1; for (int k = 0; k < osArray.Length; k++) { for (int l = 0; l < actArray.Length; l++) { temp += Math.Abs(osArray[k] - actArray[l]); } } temp = 1 / temp; t.Qvalue = temp; //Richard end ////////////////////////////////////////////////////////////////////////// InQtable.Add(t); } Qtable.Add(InQtable); } //初始化Q表 Richard end ////////////////////////////////////////////////////////////////////////// //初始化Q表 Richard //InitialQTable(Qtable); Program.ListenThread = new Thread(RunServer); ListenThread.Start(); /*************************************************/ int[] NextIntArr = new int[4];//= MyRandom(4, 1, 25); Manuevent.WaitOne(); Manuevent.Reset(); int k_1 = 0; foreach (byte j in bytefour) { NextIntArr[k_1] = (int)j; k_1++; } MyWrite(bytefour); /**************************************************/ Random ra = new Random(); int count = 10000;//循环次数 int ii = 0; while ((ii++) < count) { //获取当前的状态 int[] IntArr = NextIntArr; //Console.Write("当前的排队长度为:"); //foreach (int d in IntArr) // Console.Write(d.ToString()+" "); //Console.WriteLine("\n"); //进行排队长度到当前状态的转换 OState[] CurrentState = IntArrToOStableArr(IntArr, 4); //当前状态的序号 int MyIndexOfS = IndexOfState(CurrentState); List <int> QP = QvalueToP(Qtable[MyIndexOfS]); //Console.WriteLine("{0}",ra.Next(1,25)); //要采取动作的序号 int MyIndexOfA = IndexOfAction(ra.Next(0, QP[QP.Count - 1]), QP); int[] CurrntAction = IndexToAction(MyIndexOfA); //输出一组配时 Console.Write("输出一组配时:"); foreach (int ss in CurrntAction) { Console.Write("{0} ", ss); } Console.WriteLine("\n"); int k_2 = 0; foreach (int ca in CurrntAction) { bytefour[k_2] = (byte)ca; k_2++; } newClient.Send(bytefour); //等待几个周期 //采取配时后产生一组新的排队长度 /***************************************/ Console.WriteLine("等待下组排队长度!"); Manuevent.WaitOne(); Manuevent.Reset(); int k_3 = 0; foreach (byte j in bytefour) { NextIntArr[k_3] = (int)j; k_3++; } MyWrite(bytefour); /******************************************/ OState[] NextState = IntArrToOStableArr(NextIntArr, 4); //把排队长度换成排队状态 int MyIndexOfNextS = IndexOfState(NextState); //下一个状态的序号--获取排队状态的序列号 double V = MaxQOfS(Qtable[MyIndexOfNextS]); //下一个状态所对应的Q值的最大值 double R = Reward(IntArr, NextIntArr, 4); //奖赏函数 //更新Q值 UpdateQtable(Qtable[MyIndexOfS], MyIndexOfA, V, R); } }//end of Main()