OutputAttack(float a, float d, float t, NNState nn, string dType) { _attack = a; _defend = d; _taunt = t; _nn = nn; _decideType = dType; }
public static OutputAttack CalculateOutput(bool stunned, Fighter fighter) { float a = UnityEngine.Random.value; float d = UnityEngine.Random.Range(0, 1 - a); float t = 1 - (a + d); float[] o = new float[0], hL = new float[0]; float[][] wHL = new float[0][], wO = new float[0][]; AI_Config conf = fighter.config; StateData state = fighter.stateData; float greedy = UnityEngine.Random.value; NNState nn = new NNState(); string dType = null; // if (conf.usingAttackNN) { ConvertFightWeights(conf, state, out o, out hL, out wHL, out wO); //Debug.Log($"FORW: {wHL.Length} * {wHL[0].Length} = {(wHL.Length * wHL[0].Length)}"); //Debug.Log($"FORW: {wO.Length} * {wO[0].Length} = {(wO.Length * wO[0].Length)}"); //Debug.Log($"\t\tFORW: {(wHL.Length * wHL[0].Length)} + {(wO.Length * wO[0].Length)} = {(wHL.Length * wHL[0].Length) + (wO.Length * wO[0].Length)}"); // calculate the hidden layer nodes output for (int i = 0; i < hL.Length; i++) { hL[i] = AI.ReLu(AI.Σ(state.attState, wHL[i]) + 0); } // calculate the output nodes output for (int i = 0; i < o.Length; i++) { o[i] = AI.Σ(hL, wO[i]); } // ship this. nn = new NNState(o, hL, wO, wHL); bool useNN = greedy < conf.showoff; dType = useNN ? "nn" : "r"; // if (useNN) { // squash & set the the final values for returning a = AI.Sigmoid(o[0]); d = AI.Sigmoid(o[1]); t = AI.Sigmoid(o[2]); //Debug.Log($"{fighter.name}: Combat Exploit"); } else { //Debug.Log($"{fighter.name}: Combat Explore"); a = UnityEngine.Random.Range(0f, 1f); d = UnityEngine.Random.Range(0f, 1f); t = UnityEngine.Random.Range(0f, 1f); } } else { //Debug.Log($"{fighter.name}: Combat Traditional"); a = UnityEngine.Random.Range(0, conf.attackTrad); d = UnityEngine.Random.Range(0, conf.defendTrad); t = UnityEngine.Random.Range(0, conf.tauntTrad); } // MOVE IS STUNNED FUNCT INTO HERE INSTEAD, PASS IN BOOL if (stunned) { // check if defense is higher than taunt if (d > t) { a = 0; d = 1; t = 0; } // else that means that taunt is higher than defense, & we can set taunt else { a = 0; d = 0; t = 1; } return(new OutputAttack(a, d, t, nn, dType)); } // return(new OutputAttack(a, d, t, nn, dType)); }
public void Battle(int time, OutputAttack[] oA, Map map) { // SAVE ALL OF THESE FOR A COOL REPLAY FEATURE //oA1.DEBUGSetVals(0, 1, 0); //oA2.DEBUGSetVals(0, 1, 0); // reset ran away after every battle _ranAway = 0; opp._ranAway = 0; DisableAllRunIcons(); opp.DisableAllRunIcons(); int[][] r = Rewarder(map, oA[0].decision, oA[1].decision); UpdateActions(oA[0].decision); opp.UpdateActions(oA[1].decision); //Debug.Log($"0: [{oA[0].decision}->{r[0][oA[0].decision]}][{r[0][0]},{r[0][1]},{r[0][2]}]\n1: [{oA[1].decision}->{r[1][oA[1].decision]}][{r[1][0]},{r[1][1]},{r[1][2]}]"); // compare & calculate if (oA[0].decision == 0) { // ATTACK if (oA[1].decision == 0) { // Both take damage opp.TakeDmg(str); TakeDmg(opp.str); // how much i did to them - how much they did to me //reward = new float[] { str - opp.str, opp.str - str }; //reward[0] = new float[] { str - opp.str, }; } else if (oA[1].decision == 1) { // int shieldStr = (int)Mathf.Floor(str / 3); // oppponent takes a damage for every 3 str you have opp.TakeDmg(shieldStr); // take damage & is stunned TakeDmg(1); // You get stunned Stun(); // how much i did to them - how much they did to me //reward = new float[] { shieldStr - 2, 2 - shieldStr }; } else if (oA[1].decision == 2) { int crit = str * 2; // opponent takes damage x2 opp.TakeDmg(crit); // how much i did to them - how much they did to me //reward = new float[] { crit, -crit }; } } else if (oA[0].decision == 1) { // DEFEND if (oA[1].decision == 0) { // int oppShieldStr = (int)Mathf.Floor(opp.str / 3); // you take a damage for every 3 str your opponent has TakeDmg((int)Mathf.Floor(oppShieldStr)); // opponent takes damage _opp.TakeDmg(1); // _opp.Stun(); // how much i did to them - how much they did to me //reward = new float[] { 2 - oppShieldStr, oppShieldStr - 2 }; } else if (oA[1].decision == 1) { // take one step away from each other // Vector2Int s1 = v2Int; Vector2Int s2 = _opp.v2Int; // StepBackwardsFrom(time, map, s2); _opp.StepBackwardsFrom(time, map, s1); // how much i did to them - how much they did to me //reward = new float[] { 0, 0 }; } else if (oA[1].decision == 2) { // opponent powers up _opp.PowerUp(); // how much i did to them - how much they did to me //reward = new float[] { 3 - 0, 0 - 3 }; } } else if (oA[0].decision == 2) { // TAUNT if (oA[1].decision == 0) { // int oppCrit = _opp.str * 2; // you take damage x2 TakeDmg(oppCrit); // how much i did to them - how much they did to me //reward = new float[] { -oppCrit, oppCrit }; } else if (oA[1].decision == 1) { // you power up PowerUp(); // how much i did to them - how much they did to me //reward = new float[] { 3 - 0, 0 - 3 }; } else if (oA[1].decision == 2) { // both powers down PowerDown(); _opp.PowerDown(); // how much i did to them - how much they did to me //reward = new float[] { strIsMin ? 0 : -1, opp.strIsMin ? 0 : -1 }; } } // if (!config.isHuman && config.usingAttackNN) { int z = 0; NNState nn = oA[z].nn; float[] state = stateData.attState; float[] error = new float[nn.O_out.Length]; float[] fin = new float[nn.O_out.Length]; // assign this float[] derivF = new float[nn.O_out.Length]; float[] derivO = new float[nn.O_out.Length]; float[] derivH = new float[nn.H_out.Length]; float[][] D_out = new float[nn.O_out.Length][]; float[][] D_hidden = new float[nn.H_out.Length][]; // for (int i = 0; i < nn.W_hidden.Length; i++) { D_hidden[i] = new float[state.Length]; } // for (int i = 0; i < fin.Length; i++) { // fin[i] = AI.Sigmoid(nn.O_out[i]); // D_out[i] = new float[nn.H_out.Length]; } float[][] newWH = new float[0][]; float[][] newWO = new float[0][]; //Debug.Log($"BACK: {nn.W_hidden.Length}*{nn.W_hidden[0].Length} = {nn.W_hidden.Length * nn.W_hidden[0].Length}"); //Debug.Log($"BACK: {nn.W_out.Length}*{nn.W_out[0].Length} = {nn.W_out.Length * nn.W_out[0].Length}"); //Debug.Log($"\t\tBACK: {nn.W_hidden.Length * nn.W_hidden[0].Length} + {nn.W_out.Length * nn.W_out[0].Length} = {(nn.W_hidden.Length * nn.W_hidden[0].Length) + (nn.W_out.Length * nn.W_out[0].Length)}"); // BackProp for (int i = 0; i < r[z].Length; i++) //3 { // back prop error[i] = GM.lR * Mathf.Pow(r[z][i] * fin[i], 2); derivF[i] = 2 * (GM.lR * Mathf.Pow(r[z][i], 2)) * fin[i]; // derivO[i] = AI.Sigmoid(nn.O_out[i], true) * derivF[i]; // for (int j = 0; j < D_out[i].Length; j++) //12 { D_out[i][j] = nn.H_out[j] * derivO[i]; derivH[j] = nn.W_out[i][j] * derivO[i]; // for (int k = 0; k < D_hidden[j].Length; k++) //20 { D_hidden[j][k] = state[k] * derivH[j]; newWH = nn.W_hidden; newWH[j][k] += D_hidden[j][k] * Mathf.Sign(r[z][i]); } newWO = nn.W_out; newWO[i][j] += D_out[i][j] * Mathf.Sign(r[z][i]); } } // //for (int i = 0; i < derivF.Length; i++) //{ // Debug.Log(derivF[i]); //} // if we have toggled off their ablility to learn if (GM.nnIsLearning[z]) { config.UpdateAttack(newWH, newWO); } } if (!opp.config.isHuman && opp.config.usingAttackNN) { int z = 1; NNState nn = oA[z].nn; float[] state = opp.stateData.attState; float[] error = new float[nn.O_out.Length]; float[] fin = new float[nn.O_out.Length]; // assign this float[] derivF = new float[nn.O_out.Length]; float[] derivO = new float[nn.O_out.Length]; float[] derivH = new float[nn.H_out.Length]; float[][] D_out = new float[nn.O_out.Length][]; float[][] D_hidden = new float[nn.H_out.Length][]; //Debug.Log($"oOut: {nn.O_out.Length}, hOut: {nn.H_out.Length}, wOut: [{nn.W_out.Length}][{nn.W_out[0].Length}], wHidden: [{nn.W_hidden.Length}][{nn.W_hidden[0].Length}]"); // for (int i = 0; i < nn.W_hidden.Length; i++) { D_hidden[i] = new float[state.Length]; } // for (int i = 0; i < fin.Length; i++) { // fin[i] = AI.Sigmoid(nn.O_out[i]); // D_out[i] = new float[nn.H_out.Length]; } float[][] newWH = new float[0][]; float[][] newWO = new float[0][]; // BackProp for (int i = 0; i < r[z].Length; i++) //3 { // back prop error[i] = GM.lR * Mathf.Pow(r[z][i] * fin[i], 2); derivF[i] = 2 * (GM.lR * Mathf.Pow(r[z][i], 2)) * fin[i]; // derivO[i] = AI.Sigmoid(nn.O_out[i], true) * derivF[i]; // for (int j = 0; j < D_out[i].Length; j++) //12 { D_out[i][j] = nn.H_out[j] * derivO[i]; derivH[j] = nn.W_out[i][j] * derivO[i]; // for (int k = 0; k < D_hidden[j].Length; k++) //20 { D_hidden[j][k] = state[k] * derivH[j]; newWH = nn.W_hidden; newWH[j][k] += D_hidden[j][k] * Mathf.Sign(r[z][i]); } newWO = nn.W_out; newWO[i][j] += D_out[i][j] * Mathf.Sign(r[z][i]); } } // if we haven't toggled off their ablility to learn if (GM.nnIsLearning[z]) { opp.config.UpdateAttack(newWH, newWO); } } for (int i = 0; i < 2; i++) { int currentReward = r[i][oA[i].decision]; map.fC.UpdateGraph(i, currentReward); GM.battleAvgThisMatch[i].Add(currentReward); } // //using (StreamWriter sW = File.AppendText("masterLog.tsv")) //{ // for (int i = 0; i < 2; i++) // { // sW.WriteLine($"{sD[i]}\t{oA[i].decision}\t{r[i][0]},{r[i][1]},{r[i][2]}"); // } //} }