private void TrainANN() { for (int i = replayMemory.Count - 1; i >= 0; i--) { List <double> toutputsOld = new List <double>(); List <double> toutputsNew = new List <double>(); toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); double maxQOld = toutputsOld.Max(); int action = toutputsOld.ToList().IndexOf(maxQOld); double feedback; if (i == replayMemory.Count - 1) { feedback = replayMemory[i].reward; } else { toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); double maxQ = toutputsNew.Max(); feedback = (replayMemory[i].reward + discount * maxQ); } toutputsOld[action] = feedback; ann.Train(replayMemory[i].states, toutputsOld); } replayMemory.Clear(); }
List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); //Populate the inputs inputs.Add(bx); inputs.Add(by); inputs.Add(bvx); inputs.Add(bvy); inputs.Add(px); inputs.Add(py); //We put pv in the output is cuz so that we can train it outputs.Add(pv); if (train) { return(ann.Train(inputs, outputs)); } else { return(ann.CalcOutput(inputs, outputs)); } }
private List <double> Run( double ballXPosition, double ballYPosition, double ballXVelocity, double ballYVelocity, double paddleXPosition, double paddleYPosition, double paddleVelocity, bool train) { var inputs = new List <double> { ballXPosition, ballYPosition, ballXVelocity, ballYVelocity, paddleXPosition, paddleYPosition }; var outputs = new List <double> { paddleVelocity }; if (train) { return(_ann.Train(inputs, outputs)); } else { return(_ann.CalcOutput(inputs, outputs)); } }
List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(bx); inputs.Add(by); inputs.Add(bvx); inputs.Add(bvy); inputs.Add(px); inputs.Add(py); outputs.Add(pv); if (train) { return(ann.Train(inputs, outputs)); } else { return(ann.CalculateOutput(inputs, outputs)); } }
// Q-learning itself private void TrainFromMemories() { for (int i = replayMemory.Count - 1; i >= 0; i--) { List <double> outputsOld; List <double> outputsNew; outputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); double maxQOld = outputsOld.Max(); int action = outputsOld.ToList().IndexOf(maxQOld); double feedback; if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) { feedback = replayMemory[i].reward; } else { outputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); double maxQ = outputsNew.Max(); //Bellman's equation feedback = replayMemory[i].reward + discount * maxQ; } outputsOld[action] = feedback; ann.Train(replayMemory[i].states, outputsOld); } }
IEnumerator LoadTrainingSet() { string path = Application.dataPath + "/trainingData.txt"; string line; if (File.Exists(path)) { int lineCount = File.ReadAllLines(path).Length; StreamReader tdf = File.OpenText(path); List <double> calcOutputs = new List <double>(); List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); for (int i = 0; i < epochs; i++) { sse = 0; tdf.BaseStream.Position = 0; string currentWeights = ann.PrintWeights(); while ((line = tdf.ReadLine()) != null) { string[] data = line.Split(','); float thisError = 0; if (System.Convert.ToDouble(data[5]) != 0 && System.Convert.ToDouble(data[6]) != 0) { inputs.Clear(); outputs.Clear(); inputs.Add(System.Convert.ToDouble(data[0])); inputs.Add(System.Convert.ToDouble(data[1])); inputs.Add(System.Convert.ToDouble(data[2])); inputs.Add(System.Convert.ToDouble(data[3])); inputs.Add(System.Convert.ToDouble(data[4])); double o1 = Map(0, 1, -1, 1, System.Convert.ToSingle(data[5])); outputs.Add(o1); double o2 = Map(0, 1, -1, 1, System.Convert.ToSingle(data[6])); outputs.Add(o2); calcOutputs = ann.Train(inputs, outputs); thisError = ((Mathf.Pow((float)(outputs[0] - calcOutputs[0]), 2) + Mathf.Pow((float)(outputs[1] - calcOutputs[1]), 2))) / 2.0f; } sse += thisError; } trainingProgress = (float)i / (float)epochs; sse /= lineCount; if (lastSSE < sse) { ann.LoadWeights(currentWeights); ann.alpha = Mathf.Clamp((float)ann.alpha - 0.001f, 0.01f, 0.9f); } else { ann.alpha = Mathf.Clamp((float)ann.alpha + 0.001f, 0.01f, 0.9f); lastSSE = sse; } yield return(null); } } trainingDone = true; }
private void TrainAfterDead() { for (int i = replayMemory.Count - 1; i >= 0; i--) { List <double> toutputsOld = new List <double>(); // Q Values with current memory List <double> toutputsNew = new List <double>(); // Q Values for the next memory toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); // Old/current values double maxQOld = toutputsOld.Max(); // max q value of the old/current memory int action = toutputsOld.ToList().IndexOf(maxQOld); // best action according to that double feedback; if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) { feedback = replayMemory[i].reward; // if we at the and, then we can't get next memory } else { toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); // calculate q values for the next memory double maxQ = toutputsNew.Max(); // max q values from the next state // bellman equation feedback = (replayMemory[i].reward + discount * maxQ); // current reward + discount * maxQ` // taking this feedback for training ANN } toutputsOld[action] = feedback; // updating max. action of current states with feedback, use it as desired outputs ann.Train(replayMemory[i].states, toutputsOld); } Done(); replayMemory.Clear(); failCount++; episode++; }
//Run the ANN, that can train or calculate output based on a boolean List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); //Add the inputs to the inputs list inputs.Add(bx); inputs.Add(by); inputs.Add(bvx); inputs.Add(bvy); inputs.Add(px); inputs.Add(py); //Ad the expected output to outputs list outputs.Add(pv); //Call function according to boolean flag if (train) { return(ann.Train(inputs, outputs)); } else { return(ann.CalcOutput(inputs, outputs)); } }
List <double> Train(double i1, double i2, double o) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(i1); inputs.Add(i2); outputs.Add(o); return(ann.Train(inputs, outputs)); }
private List <double> Train(double i1, double i2, double o) { List <double> inputs = new List <double>(); List <double> desiredOutputs = new List <double>(); inputs.Add(i1); inputs.Add(i2); desiredOutputs.Add(o); return(ann.Train(inputs, desiredOutputs)); }
private void FixedUpdate() { List <double> states = new List <double>(); List <double> qs = new List <double>(); states.Add(this.transform.position.y); states.Add(this.GetComponent <Rigidbody2D>().velocity.y); qs = ann.CalcOutput(states); this.GetComponent <Rigidbody2D>().AddForce(Vector2.up * force * (float)qs[0]); if (dead) { reward = -1; } else { reward = 0.1f; } replay lastmemory = new replay(this.transform.position.y, this.GetComponent <Rigidbody2D>().velocity.y, reward); if (replaymemory.Count > mcapacity) { replaymemory.RemoveAt(0); } replaymemory.Add(lastmemory); //Training And QLearning if (dead) { for (int i = replaymemory.Count - 1; i >= 0; i--) { List <double> toutputs_old = new List <double>(); List <double> toutputs_new = new List <double>(); toutputs_old = ann.CalcOutput(replaymemory[i].states); double feedback; if (i == replaymemory.Count - 1 || replaymemory[i].reward == -1) { feedback = replaymemory[i].reward; } else { toutputs_new = ann.CalcOutput(replaymemory[i + 1].states); double maxQ = toutputs_new[0]; feedback = (replaymemory[i].reward + discount * maxQ); //BELLMAN EQUATION } toutputs_old[0] = feedback; ann.Train(replaymemory[i].states, toutputs_old); } dead = false; Reset(); replaymemory.Clear(); } }
// Wrapper method for train and calculate output // List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train) { List <double> inputs = new List <double>() { bx, by, bvx, bvy, px, py }; List <double> outputs = new List <double>() { pv }; return(train ? ann.Train(inputs, outputs) : ann.CalcOutput(inputs, outputs)); }
List <double> Train(double i0, double i1, double i2, double i3, double i4, double i5, double output) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(i0); inputs.Add(i1); inputs.Add(i2); inputs.Add(i3); inputs.Add(i4); inputs.Add(i5); outputs.Add(output); return(ann.Train(inputs, outputs)); }
private IEnumerator LoadTrainingSet() { string path = Application.dataPath + "/CarRacing/trainingData.txt"; string line; if (File.Exists(path)) { int lineCount = File.ReadAllLines(path).Length; StreamReader streamReader = File.OpenText(path); List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); for (int i = 0; i < epochs; i++) { sse = 0; streamReader.BaseStream.Position = 0; string currentWeights = ann.PrintWeights(); while ((line = streamReader.ReadLine()) != null) { string[] data = line.Split(','); //if nothing to be learned ignore this line float thisError = 0; if (System.Convert.ToDouble(data[5]) != 0 && System.Convert.ToDouble(data[6]) != 0) { inputs.Clear(); outputs.Clear(); FillInputs(inputs, data); FillOutputs(outputs, data); List <double> calcOutputs = ann.Train(inputs, outputs); thisError = ((Mathf.Pow((float)(outputs[0] - calcOutputs[0]), 2) + Mathf.Pow((float)(outputs[1] - calcOutputs[1]), 2))) / 2.0f; } sse += thisError; } trainingProgress = (float)i / (float)epochs; sse /= lineCount; CorrectTraining(currentWeights); yield return(null); } } finishedTraining = true; SaveWeightsToFile(); }
List <double> Train(int input1, int input2, int desiredOutput, bool updateWeights = true) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(input1); inputs.Add(input2); outputs.Add(desiredOutput); if (updateWeights) { return(ann.Train(inputs, outputs)); } else { return(ann.CalcOutput(inputs, outputs)); } }
private void Learn() { int layerMask = 1 << 8; RaycastHit2D hit = Physics2D.Raycast(ball.transform.position, ballRb.velocity, 1000); GameState gameState = new GameState(ball.position, ballRb.velocity, paddle.position, hit.point); //desired movement is current pos to pos that ball will be float desiredDelta = hit.point.y - paddle.position.y; List <double> inputs = gameState.GetInputs(); List <double> output = ann.Train(inputs, new List <double> { desiredDelta }); yvel = (float)output[0] * Time.deltaTime * paddleSpeed; print("Y vel = " + yvel); }
List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(bx); //ball x pos inputs.Add(by); //ball y pos inputs.Add(bvx); //ball velocity x inputs.Add(bvy); //ball velocity y inputs.Add(px); //paddle x pos inputs.Add(py); //paddle y pos outputs.Add(pv); //paddle y velocity if (train) { return(ann.Train(inputs, outputs)); } else { return(ann.CalcOutput(inputs, outputs)); } }
public List <double> Run(double bxp, double byp, double pxp, double pyp, double bvx, double bvy, double pvy, bool train) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(bxp); inputs.Add(byp); inputs.Add(pxp); inputs.Add(pyp); inputs.Add(bvx); inputs.Add(bvy); outputs.Add(pvy); if (train) { return(ann.Train(inputs, outputs)); } else { return(ann.CalcOutput(inputs, outputs)); } }
List <double> Run(double ballX, double ballY, double ballVelX, double ballVelY, double paddleX, double paddleY, double paddleVel, bool train) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(ballX); inputs.Add(ballY); inputs.Add(ballVelX); inputs.Add(ballVelY); inputs.Add(paddleX); inputs.Add(paddleY); outputs.Add(paddleVel); if (train) { return(ann.Train(inputs, outputs)); } else { return(ann.CalcOutput(inputs, outputs)); } }
List <double> Run(double ballX, double ballY, double ballVelocX, double ballVelocY, double paddleX, double paddleY, double paddleVelocity, bool train) { List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(ballX); inputs.Add(ballY); inputs.Add(ballVelocX); inputs.Add(ballVelocY); inputs.Add(paddleX); inputs.Add(paddleY); outputs.Add(paddleVelocity); if (train) { return(_artificialNeuronNetwork.Train(inputs, outputs)); } else { return(_artificialNeuronNetwork.CalcOutput(inputs, outputs)); } }
// METHOD - This method either does training or does calculations without affecting the training List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train) { // Six inputs and One Output List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); inputs.Add(bx); // Ball x position inputs.Add(by); // Ball y position inputs.Add(bvx); // Ball x velocity inputs.Add(bvy); // Ball y velocity inputs.Add(px); // Paddle x position inputs.Add(py); // Paddle y position outputs.Add(pv); // Paddle velocity, this is ignored when we are calculating and not training // If training is selected, then go ahead and perform the training if (train) { return(ann.Train(inputs, outputs)); } else { // Otherwise, only calculate the output without affecting the training return(ann.CalcOutput(inputs, outputs)); } }
private void FixedUpdate() { timer += Time.deltaTime; List <double> states = new List <double>(); List <double> qs = new List <double>(); states.Add(this.transform.rotation.x); states.Add(ball.transform.position.z); states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x); qs = SoftMax(ann.CalcOutput(states)); double maxQ = qs.Max(); int maxQindex = qs.ToList().IndexOf(maxQ); explorerate = Mathf.Clamp(explorerate - exploredecay, minexplorerate, maxeplorerate); /*NO NEED OF EXPLORING IN THIS CASE AS ENVIRONMENT IS REALLY VERY SMALL * if(Random.Range(0,100)<explorerate) * { * maxQindex = Random.Range(0, 2); * }*/ if (maxQindex == 0) { this.transform.Rotate(Vector3.right, tiltspeed * (float)qs[maxQindex]); } else if (maxQindex == 1) { this.transform.Rotate(Vector3.right, -tiltspeed * (float)qs[maxQindex]); } if (ball.GetComponent <BallState>().dropped) { reward = -1; } else { reward = 0.1f; } Replay lastmemory = new Replay(this.transform.rotation.x, ball.transform.position.z, ball.GetComponent <Rigidbody>().angularVelocity.x, reward); if (replaymemory.Count > mcapacity) { replaymemory.RemoveAt(0); } replaymemory.Add(lastmemory); //Training and QLEARNING if (ball.GetComponent <BallState>().dropped) { for (int i = replaymemory.Count - 1; i >= 0; i--) { List <double> toutputs_old = new List <double>(); List <double> toutputs_new = new List <double>(); toutputs_old = SoftMax(ann.CalcOutput(replaymemory[i].states)); double maxQ_old = toutputs_old.Max(); int action = toutputs_old.ToList().IndexOf(maxQ_old); double feedback; if (i == replaymemory.Count - 1 || replaymemory[i].reward == -1) { feedback = replaymemory[i].reward; } else { toutputs_new = SoftMax(ann.CalcOutput(replaymemory[i + 1].states)); maxQ = toutputs_new.ToList().Max(); feedback = (replaymemory[i].reward + discount * maxQ); //BELLMAN EQUATION } toutputs_old[action] = feedback; ann.Train(replaymemory[i].states, toutputs_old); } if (timer > maxBalanceTime) { maxBalanceTime = timer; } timer = 0; ball.GetComponent <BallState>().dropped = false; this.transform.rotation = Quaternion.identity; Reset(); replaymemory.Clear(); fallcount++; } }
void FixedUpdate() { timer += Time.deltaTime; List <double> states = new List <double>(); List <double> qs = new List <double>(); states.Add(this.transform.rotation.x); states.Add(this.transform.position.z); states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x); // reflection not working in VCS qs = SoftMax(ann.CalcOutput(states)); // why a softmax? double maxQ = qs.Max(); // cost: O(L), where L is length of the list int maxQIndex = qs.ToList().IndexOf(maxQ); // cost is O(L) // in my opinion, exploreRate should decrease after each fail and not after each fixedUpdate exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate); // Udemy: remove these lines will accelerate convergence // more exporation early on, and less later on if (Random.Range(0, 100) < exploreRate) { maxQIndex = Random.Range(0, 2); // choose either 0 or 1 } if (maxQIndex == 0) { // public void Rotate(Vector3 eulerAngles, Space relativeTo = Space.Self); // public void Rotate(Vector3 axis, float angle, Space relativeTo = Space.Self); this.transform.Rotate(Vector3.right, tiltSpeed * (float)qs[maxQIndex]); } else if (maxQIndex == 1) { this.transform.Rotate(Vector3.right, -tiltSpeed * (float)qs[maxQIndex]); } if (ball.GetComponent <BallState>().dropped) { reward = -1.0f; } else { reward = 0.1f; // [0.1f] } Replay lastMemory = new Replay(this.transform.rotation.x, ball.transform.position.z, ball.GetComponent <Rigidbody>().angularVelocity.x, reward); if (replayMemory.Count > mCapacity) { replayMemory.RemoveAt(0); } replayMemory.Add(lastMemory); if (ball.GetComponent <BallState>().dropped) { for (int i = replayMemory.Count - 1; i >= 0; i--) { List <double> toutputsOld = new List <double>(); List <double> toutputsNew = new List <double>(); toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); // why a softmax? double maxQOld = toutputsOld.Max(); int action = toutputsOld.ToList().IndexOf(maxQOld); double feedback; if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) { feedback = replayMemory[i].reward; } else { toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); maxQ = toutputsNew.Max(); feedback = (replayMemory[i].reward + discount * maxQ); } toutputsOld[action] = feedback; ann.Train(replayMemory[i].states, toutputsOld); } if (timer > maxBalanceTime) { maxBalanceTime = timer; } timer = 0; ball.GetComponent <BallState>().dropped = false; this.transform.rotation = Quaternion.identity; ResetBall(); replayMemory.Clear(); failCount++; } }
void FixedUpdate() { frames++; // seeGround = true; // isOnGround = Physics2D.OverlapCircle(groundCheck.position, groundCheckRadius, whatIsGround); Debug.DrawRay(theEyes.transform.position, theEyes.transform.right * 20, Color.green); RaycastHit2D hit = Physics2D.Raycast(theEyes.transform.position, theEyes.transform.right * 20); if (hit && hit.collider.tag == "Killbox") { seeGround = false; Debug.DrawRay(theEyes.transform.position, theEyes.transform.right * 20, Color.red); } // double[] distancesFromObjects = new double[platforms.Length]; // for(int i = 0; i < platforms.Length; i++) // { // Vector3 heading = transform.position - platforms[i].transform.position; // distancesFromObjects[i] = heading.magnitude; // } // // second closest, to be honest // System.Array.Sort(distancesFromObjects); // double closestPlatform = distancesFromObjects[1]; // int indexOfClosest = distancesFromObjects.ToList().IndexOf(closestPlatform); // Vector3 closestPoint = platforms[indexOfClosest].transform.position; timer += Time.deltaTime; List <double> states = new List <double>(); List <double> qs = new List <double>(); GameObject[] platforms = GameObject.FindGameObjectsWithTag("platform"); Vector3 bestPoint = GetClosestEnemy(platforms); Vector3 closestPoint = GetClosestGap(platforms); Vector3 directionToNextPlatform = bestPoint - transform.position; Vector3 directionToNextGap = closestPoint - transform.position; // states.Add(transform.position.y); // states.Add(rb.velocity.y); states.Add(directionToNextPlatform.x); // states.Add(directionToNextPlatform.y); states.Add(directionToNextGap.x); // Debug.Log(directionToNextGap.x); qs = SoftMax(ann.CalcOutput(states)); double maxQ = qs.Max(); int maxQIndex = qs.ToList().IndexOf(maxQ); exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate); if (Random.Range(0, 100) < exploreRate) { maxQIndex = Random.Range(0, 2); } if (maxQIndex == 1) { sumOfJumps++; } if (maxQIndex == 0) { sumOfStays++; } if (frames % 8 == 0) { if (sumOfJumps > sumOfStays) { robotAccess.RobotJump(); } sumOfStays = 0; sumOfJumps = 0; frames = 0; } if (rb.velocity.x < 0.5) { robotAccess.RobotJump(); } if (hitObstacle) { reward = -5.0f; } else { reward = 0.1f; } Replay lastMemory = new Replay( // transform.position.y, // rb.velocity.y, directionToNextPlatform.x, // directionToNextPlatform.y, directionToNextGap.x, reward); if (replayMemory.Count > mCapacity) { replayMemory.RemoveAt(0); } replayMemory.Add(lastMemory); if (hitObstacle) { for (int i = replayMemory.Count - 1; i >= 0; i--) { List <double> tOutputsOld = new List <double>(); List <double> tOutputsNew = new List <double>(); tOutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); double maxQOld = tOutputsOld.Max(); int action = tOutputsOld.ToList().IndexOf(maxQOld); double feedback; if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) { feedback = replayMemory[i].reward; } else { tOutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); maxQ = tOutputsNew.Max(); feedback = (replayMemory[i].reward + discount * maxQ); } tOutputsOld[action] = feedback; ann.Train(replayMemory[i].states, tOutputsOld); } if (timer > maxBalanceTime) { maxBalanceTime = timer; } timer = 0; hitObstacle = false; theGameManager.Reset(); replayMemory.Clear(); failCount++; } }
/// <summary> /// Loads the training dataset. /// </summary> /// <returns> null (Coroutine). </returns> private IEnumerator LoadTrainingSet() { string dataSetFilePath = Application.dataPath + dataSetFolder + dataSetFileName; // An instance of training (row in the dataSet). string instance; if (File.Exists(dataSetFilePath)) { Debug.Log("Using training data file found at: " + dataSetFilePath); int instanceCount = File.ReadAllLines(dataSetFilePath).Length; StreamReader dataSetFile = File.OpenText(dataSetFilePath); var calculatedOutputs = new List <double>(); var inputs = new List <double>(); var desiredOutputs = new List <double>(); for (var i = 0; i < epochs; i++) { sumSquaredError = 0; // Set file pointer to beginning of file. dataSetFile.BaseStream.Position = 0; string currentWeights = ann.PrintWeights(); // Read one instance (line) at a time until the end of the dataSet. while ((instance = dataSetFile.ReadLine()) != null) { // Separate each feature (column) of the current instance (row). string[] features = instance.Split(','); // The error we get from a particular instance. // If nothing to be learned, ignore this line. float thisError = 0; // Ignore instances, where no user input was recorded. // They provide no useful information. // TODO: Fix floating point number comparison!? if (System.Convert.ToDouble(features[5]) != 0 && System.Convert.ToDouble(features[6]) != 0) { inputs.Clear(); desiredOutputs.Clear(); // TODO: Check that training data and inputs are calculated the same way (rounding, normalizing, etc.). // Assign the first five features (raycast distances) to inputs. for (int j = 0; j < 5; j++) { inputs.Add(System.Convert.ToDouble(features[j])); } // Assigns the remaining two features (user input) to outputs. for (int j = 5; j < 7; j++) { double output = Helpers.Map(0, 1, -1, 1, System.Convert.ToSingle(features[j])); desiredOutputs.Add(output); } // Train the Neural Network. calculatedOutputs = ann.Train(inputs, desiredOutputs); // Calculate individual squaredErrors. float output0ErrorSquared = Mathf.Pow((float)(desiredOutputs[0] - calculatedOutputs[0]), 2); float output1ErrorSquared = Mathf.Pow((float)(desiredOutputs[1] - calculatedOutputs[1]), 2); // Calculate averaged sum of squared errors. thisError = (output0ErrorSquared + output1ErrorSquared) / 2f; } sumSquaredError += thisError; } // Percentage value. trainingProgress = (float)i / (float)epochs; // Calculate average sumOfSquaredErrors. sumSquaredError /= instanceCount; AdaptLearning(currentWeights); yield return(null); } } else { Debug.LogError("No training data file found at: " + dataSetFilePath); } trainingDone = true; if (!loadWeightsFromFile) { SaveWeightsToFile(); } }
void FixedUpdate() { timer += Time.deltaTime; List <double> states = new List <double>(); List <double> qs = new List <double>(); RaycastHit hit; float fDist = visibleDistance, rDist = visibleDistance, lDist = visibleDistance, r45Dist = visibleDistance, l45Dist = visibleDistance; if (Physics.Raycast(transform.position, this.transform.forward, out hit, visibleDistance, terrainLayer)) { fDist = Vector3.Distance(transform.position, hit.point); } if (Physics.Raycast(transform.position, this.transform.right, out hit, visibleDistance, terrainLayer)) { rDist = Vector3.Distance(transform.position, hit.point); } if (Physics.Raycast(transform.position, -this.transform.right, out hit, visibleDistance, terrainLayer)) { lDist = Vector3.Distance(transform.position, hit.point); } if (Physics.Raycast(transform.position, Quaternion.AngleAxis(-45, Vector3.up) * this.transform.right, out hit, visibleDistance, terrainLayer)) { r45Dist = Vector3.Distance(transform.position, hit.point); } if (Physics.Raycast(transform.position, Quaternion.AngleAxis(45, Vector3.up) * -this.transform.right, out hit, visibleDistance, terrainLayer)) { l45Dist = hit.distance; } // Debug.Log("Frontal: " + fDist + ", Derecha: " + rDist + ", Izquierda: " + lDist + ", Derecha45: " + r45Dist + ", Izquierda45: " + l45Dist); states.Add(fDist); states.Add(rDist); states.Add(lDist); states.Add(r45Dist); states.Add(l45Dist); qs = SoftMax(ann.CalcOutput(states)); double maxQ = qs.Max(); int maxQIndex = qs.ToList().IndexOf(maxQ); //exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate); //if(Random.Range(0,100) < exploreRate) // maxQIndex = Random.Range(0,2); float translation = speed * Time.deltaTime; this.transform.Translate(0, 0, translation); if (maxQIndex == 0) { this.transform.Rotate(Vector3.up, tiltSpeed * (float)qs[maxQIndex]); } else if (maxQIndex == 1) { this.transform.Rotate(Vector3.up, -tiltSpeed * (float)qs[maxQIndex]); } if (ball.GetComponent <BallState>().dropped) { reward = -1.0f; //reward = 0; } else if (ball.GetComponent <BallState>().point) { reward = 0.5f; } else { reward = 0.1f;// + 0.01f; } Replay lastMemory = new Replay(fDist, rDist, lDist, r45Dist, l45Dist, reward); if (replayMemory.Count > mCapacity) { replayMemory.RemoveAt(0); } replayMemory.Add(lastMemory); if (ball.GetComponent <BallState>().dropped) { ResetBall(); //Para que no se quede pillado al no tener archivo. for (int i = replayMemory.Count - 1; i >= 0; i--) { List <double> toutputsOld = new List <double>(); List <double> toutputsNew = new List <double>(); toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); double maxQOld = toutputsOld.Max(); int action = toutputsOld.ToList().IndexOf(maxQOld); double feedback; if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) { feedback = replayMemory[i].reward; } else { toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); maxQ = toutputsNew.Max(); feedback = (replayMemory[i].reward + discount * maxQ); } toutputsOld[action] = feedback; ann.Train(replayMemory[i].states, toutputsOld); } timer = 0; ball.GetComponent <BallState>().dropped = false; this.transform.rotation = Quaternion.identity; ResetBall(); replayMemory.Clear(); failCount++; if (_isAleatoryCircuit) { if (failCount == 1000) { flowchart.ExecuteBlock("LOSE"); } //Debug.Log( "Fails: " + failCount); onFail?.Invoke(failCount); } //reward = 0;///////////////////////////////// } if (ball.GetComponent <BallState>().meta) { ball.GetComponent <BallState>().meta = false; //string pesos = PlayerPrefs.GetString("Weights"); string pesos = ann.PrintWeights(); if (_isAleatoryCircuit) { if (!WIN && failCount <= 1000) { if (flowchart != null) { flowchart.ExecuteBlock("WIN"); } WIN = true; } /*List<string> saveFileContent = new List<string>(); * saveFileContent.Add(currentAleatoryCircuitName); * saveFileContent.Add(pesos); * SaveAndLoad.Save(saveFileContent, currentAleatoryCircuitName + ".txt");*/ manager.SaveNewDataDictionary(currentAleatoryCircuitName, pesos); Debug.Log(currentAleatoryCircuitName); } else { managerCircuits.SaveNewDataDictionary(circuitName, pesos); } /* * if (!_isAleatoryCircuit && maxBalanceTime <= 0) * { * pesos = ann.PrintWeights(); * SaveAndLoad.Save(pesos, CIRCUITO1); * } * else if(!_isAleatoryCircuit && maxBalanceTime > timer) * { * pesos = ann.PrintWeights(); * SaveAndLoad.Save(pesos, CIRCUITO1); * }*/ maxBalanceTime = timer; Debug.Log(maxBalanceTime); timer = 0; } }
private void FixedUpdate() { timer += Time.deltaTime; List <double> states = new List <double>(); List <double> qs = new List <double>(); states.Add(this.transform.rotation.x); states.Add(ball.transform.position.z); states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x); qs = SoftMax(ann.CalcOutput(states)); double maxQ = qs.Max(); int maxQIndex = qs.ToList().IndexOf(maxQ); exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate); if (Random.Range(0, 10000) < exploreRate) { maxQIndex = Random.Range(0, 2); } if (maxQIndex == 0) { this.transform.Rotate(Vector3.right, tiltSpeed * (float)qs[maxQIndex]); } else if (maxQIndex == 1) { this.transform.Rotate(Vector3.right, -tiltSpeed * (float)qs[maxQIndex]); } if (ball.GetComponent <BallState>().dropped) { reward = -5.0f; } else { reward = 0.1f; } Replay lastMemory = new Replay(this.transform.rotation.x, ball.transform.position.z, ball.GetComponent <Rigidbody>().angularVelocity.x, reward); if (replayMemory.Count > mCapacity) { replayMemory.RemoveAt(0); } replayMemory.Add(lastMemory); if (ball.GetComponent <BallState>().dropped) { for (int i = replayMemory.Count - 1; i >= 0; i--) { List <double> toutputOld = new List <double>(); List <double> toutputNew = new List <double>(); toutputOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); double maxQOld = toutputOld.Max(); int action = toutputOld.ToList().IndexOf(maxQOld); double feedback; if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) { feedback = replayMemory[i].reward; } else { toutputNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); maxQ = toutputNew.Max(); feedback = (replayMemory[i].reward * discount * maxQ); } toutputOld[action] = feedback; ann.Train(replayMemory[i].states, toutputOld); } if (timer > maxBalanceTime) { maxBalanceTime = timer; } timer = 0; ball.GetComponent <BallState>().dropped = false; this.transform.rotation = Quaternion.identity; ResetBall(); replayMemory.Clear(); failCount++; } }
private void FixedUpdate() { timer += Time.deltaTime; List <double> states = new List <double>(); List <double> qs = new List <double>(); states.Add(this.transform.rotation.x); states.Add(this.transform.rotation.z); states.Add(this.transform.position.z); states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x); states.Add(ball.GetComponent <Rigidbody>().angularVelocity.z); qs = ANN.SoftMax(ann.CalculateOutput(states)); double maxQ = qs.Max(); int maxQIndex = qs.ToList().IndexOf(maxQ); exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate); //check to see if we choose a random action if (UnityEngine.Random.Range(1, 100) < exploreRate) { maxQIndex = UnityEngine.Random.Range(0, 4); } //action 0 tilt right //action 1 tilt left //action 2 tilt forward //action 3 tilt backward //mapQIndex == 0 means action 0 if (maxQIndex == 0) { this.transform.Rotate(Vector3.right, tiltSpeed * (float)qs[maxQIndex]); } else if (maxQIndex == 1) { this.transform.Rotate(Vector3.right, -tiltSpeed * (float)qs[maxQIndex]); } else if (maxQIndex == 2) { this.transform.Rotate(Vector3.forward, tiltSpeed * (float)qs[maxQIndex]); } else if (maxQIndex == 3) { this.transform.Rotate(Vector3.forward, -tiltSpeed * (float)qs[maxQIndex]); } if (ball.GetComponent <BallState>().dropped) { reward = -1f; } else { reward = 0.1f; } Replay lastMemory = new Replay(this.transform.rotation.x, this.transform.rotation.z, ball.transform.position.z, ball.GetComponent <Rigidbody>().angularVelocity.x, ball.GetComponent <Rigidbody>().angularVelocity.z, reward); if (replayMemory.Count > mCapacity) { replayMemory.RemoveAt(0); } replayMemory.Add(lastMemory); //Q learning starts here //upto this point all we did is get an inputs and getting the result from ann, //rewarding accordingly and then storing them. if (ball.GetComponent <BallState>().dropped) { //looping backwards so the quality of the last memory get carried //backwards up through the list so we can attributed it's blame through //the list for (int i = replayMemory.Count - 1; i >= 0; --i) { //foreach memory we ran the ann //first we found out what are the q values of the current memory List <double> currentMemoryQValues = new List <double>(); //then we take the q values of the next memory List <double> nextMemoryQValues = new List <double>(); currentMemoryQValues = ANN.SoftMax(ann.CalculateOutput(replayMemory[i].states)); //find the maximum Q value of the current memories double maxQOld = currentMemoryQValues.Max(); //which action gave that q value int action = currentMemoryQValues.ToList().IndexOf(maxQOld); double feedback; //checking if the current memory is the last memeory //or if that memory reward is -1, if it is -1, it means, that ball was dropped //and every memory after this is meaningless, because this is the end of the //memories sequance if ((i == replayMemory.Count - 1) || (replayMemory[i].reward == -1f)) { feedback = replayMemory[i].reward; } else { nextMemoryQValues = ANN.SoftMax(ann.CalculateOutput(replayMemory[i + 1].states)); maxQ = nextMemoryQValues.Max(); feedback = (replayMemory[i].reward + discount * maxQ); } //adding the correct reward (Q value) to the current action currentMemoryQValues[action] = feedback; //using the feedback to train the ANN ann.Train(replayMemory[i].states, currentMemoryQValues); } if (timer > maxBalanceTime) { maxBalanceTime = timer; } timer = 0; ball.GetComponent <BallState>().dropped = false; this.transform.rotation = Quaternion.identity; ResetBall(); replayMemory.Clear(); failCount++; } }
void FixedUpdate() { timer += Time.deltaTime; List <double> states = new List <double>(); List <double> qs = new List <double>(); states.Add(Vector3.Distance(this.transform.position, topBeam.transform.position)); states.Add(Vector3.Distance(this.transform.position, bottomBeam.transform.position)); qs = SoftMax(ann.CalcOutput(states)); double maxQ = qs.Max(); int maxQIndex = qs.ToList().IndexOf(maxQ); exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate); //if(Random.Range(0,100) < exploreRate) // maxQIndex = Random.Range(0,2); if (maxQIndex == 0) { rb.AddForce(Vector3.up * moveForce * (float)qs[maxQIndex]); } else if (maxQIndex == 1) { rb.AddForce(Vector3.up * -moveForce * (float)qs[maxQIndex]); } if (crashed) { reward = -1.0f; } else { reward = 0.1f; } Replay lastMemory = new Replay(Vector3.Distance(this.transform.position, topBeam.transform.position), Vector3.Distance(this.transform.position, bottomBeam.transform.position), reward); if (replayMemory.Count > mCapacity) { replayMemory.RemoveAt(0); } replayMemory.Add(lastMemory); if (crashed) { for (int i = replayMemory.Count - 1; i >= 0; i--) { List <double> toutputsOld = new List <double>(); List <double> toutputsNew = new List <double>(); toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); double maxQOld = toutputsOld.Max(); int action = toutputsOld.ToList().IndexOf(maxQOld); double feedback; if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) { feedback = replayMemory[i].reward; } else { toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); maxQ = toutputsNew.Max(); feedback = (replayMemory[i].reward + discount * maxQ); } toutputsOld[action] = feedback; ann.Train(replayMemory[i].states, toutputsOld); } if (timer > maxBalanceTime) { maxBalanceTime = timer; } timer = 0; crashed = false; ResetBird(); replayMemory.Clear(); failCount++; } }
// Method to perform the ANN training using data collected from the player. IEnumerator LoadTrainingSet() { string path = Application.dataPath + "/trainingData.txt"; string line; if (File.Exists(path)) { int lineCount = File.ReadAllLines(path).Length; StreamReader tdf = File.OpenText(path); List <double> calcOutputs = new List <double>(); List <double> inputs = new List <double>(); List <double> outputs = new List <double>(); //Loop through the epochs for (int i = 0; i < epochs; i++) { //set file pointer to beginning of file sse = 0; tdf.BaseStream.Position = 0; //Get the current weight comma separated string values from the ANN object string currentWeights = ann.PrintWeights(); // Load the training data, line by line while ((line = tdf.ReadLine()) != null) { string[] data = line.Split(','); //if nothing to be learned ignore this line float thisError = 0; //We are leaving out those data where we have training labels or y (translation and rotation values) with values zero to reduce the data set. if (System.Convert.ToDouble(data[5]) != 0 && System.Convert.ToDouble(data[6]) != 0) //If translation and rotation outputs in training data are not zero { //Clear out lists from previous row inputs.Clear(); outputs.Clear(); //Add training input data to the inputs to the ANN inputs.Add(System.Convert.ToDouble(data[0])); inputs.Add(System.Convert.ToDouble(data[1])); inputs.Add(System.Convert.ToDouble(data[2])); inputs.Add(System.Convert.ToDouble(data[3])); inputs.Add(System.Convert.ToDouble(data[4])); //Map labels to range (0,1) for efficient training double o1 = Map(0, 1, -1, 1, System.Convert.ToSingle(data[5])); outputs.Add(o1); double o2 = Map(0, 1, -1, 1, System.Convert.ToSingle(data[6])); outputs.Add(o2); //Calculated output (y-hat) calcOutputs = ann.Train(inputs, outputs); //Sum squared Error value: for both labels thisError = ((Mathf.Pow((float)(outputs[0] - calcOutputs[0]), 2) + Mathf.Pow((float)(outputs[1] - calcOutputs[1]), 2))) / 2.0f; } //Add this to cumulative SSE for the epoch sse += thisError; } //Percentage training to display on screen trainingProgress = (float)i / (float)epochs; // Average SSE sse /= lineCount; //If sse isn't better then reload previous set of weights and decrease alpha. This adaptive training to let the ANN move out // of local optima and hence find global optima. if (lastSSE < sse) { ann.LoadWeights(currentWeights); ann.alpha = Mathf.Clamp((float)ann.alpha - 0.001f, 0.01f, 0.9f); } else //increase alpha { ann.alpha = Mathf.Clamp((float)ann.alpha + 0.001f, 0.01f, 0.9f); lastSSE = sse; } yield return(null); //Allow OnGUI some time to update on-screen values } } //Training done, save weights trainingDone = true; SaveWeightsToFile(); }