private void TrainANN()
    {
        for (int i = replayMemory.Count - 1; i >= 0; i--)
        {
            List <double> toutputsOld = new List <double>();
            List <double> toutputsNew = new List <double>();
            toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states));

            double maxQOld = toutputsOld.Max();
            int    action  = toutputsOld.ToList().IndexOf(maxQOld);

            double feedback;
            if (i == replayMemory.Count - 1)
            {
                feedback = replayMemory[i].reward;
            }
            else
            {
                toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states));
                double maxQ = toutputsNew.Max();
                feedback = (replayMemory[i].reward +
                            discount * maxQ);
            }

            toutputsOld[action] = feedback;
            ann.Train(replayMemory[i].states, toutputsOld);
        }
        replayMemory.Clear();
    }
    List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        //Populate the inputs
        inputs.Add(bx);
        inputs.Add(by);
        inputs.Add(bvx);
        inputs.Add(bvy);
        inputs.Add(px);
        inputs.Add(py);

        //We put pv in the output is cuz so that we can train it
        outputs.Add(pv);

        if (train)
        {
            return(ann.Train(inputs, outputs));
        }
        else
        {
            return(ann.CalcOutput(inputs, outputs));
        }
    }
Exemple #3
0
    private List <double> Run(
        double ballXPosition,
        double ballYPosition,
        double ballXVelocity,
        double ballYVelocity,
        double paddleXPosition,
        double paddleYPosition,
        double paddleVelocity,
        bool train)
    {
        var inputs = new List <double>
        {
            ballXPosition,
            ballYPosition,
            ballXVelocity,
            ballYVelocity,
            paddleXPosition,
            paddleYPosition
        };

        var outputs = new List <double>
        {
            paddleVelocity
        };

        if (train)
        {
            return(_ann.Train(inputs, outputs));
        }
        else
        {
            return(_ann.CalcOutput(inputs, outputs));
        }
    }
Exemple #4
0
    List <double> Run(double bx,
                      double by,
                      double bvx,
                      double bvy,
                      double px,
                      double py,
                      double pv,
                      bool train)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(bx);
        inputs.Add(by);
        inputs.Add(bvx);
        inputs.Add(bvy);
        inputs.Add(px);
        inputs.Add(py);
        outputs.Add(pv);

        if (train)
        {
            return(ann.Train(inputs, outputs));
        }
        else
        {
            return(ann.CalculateOutput(inputs, outputs));
        }
    }
Exemple #5
0
    // Q-learning itself
    private void TrainFromMemories()
    {
        for (int i = replayMemory.Count - 1; i >= 0; i--)
        {
            List <double> outputsOld;
            List <double> outputsNew;

            outputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states));

            double maxQOld = outputsOld.Max();
            int    action  = outputsOld.ToList().IndexOf(maxQOld);

            double feedback;
            if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1)
            {
                feedback = replayMemory[i].reward;
            }
            else
            {
                outputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states));
                double maxQ = outputsNew.Max();
                //Bellman's equation
                feedback = replayMemory[i].reward + discount * maxQ;
            }

            outputsOld[action] = feedback;
            ann.Train(replayMemory[i].states, outputsOld);
        }
    }
Exemple #6
0
    IEnumerator LoadTrainingSet()
    {
        string path = Application.dataPath + "/trainingData.txt";
        string line;

        if (File.Exists(path))
        {
            int           lineCount   = File.ReadAllLines(path).Length;
            StreamReader  tdf         = File.OpenText(path);
            List <double> calcOutputs = new List <double>();
            List <double> inputs      = new List <double>();
            List <double> outputs     = new List <double>();

            for (int i = 0; i < epochs; i++)
            {
                sse = 0;
                tdf.BaseStream.Position = 0;
                string currentWeights = ann.PrintWeights();
                while ((line = tdf.ReadLine()) != null)
                {
                    string[] data      = line.Split(',');
                    float    thisError = 0;
                    if (System.Convert.ToDouble(data[5]) != 0 && System.Convert.ToDouble(data[6]) != 0)
                    {
                        inputs.Clear();
                        outputs.Clear();
                        inputs.Add(System.Convert.ToDouble(data[0]));
                        inputs.Add(System.Convert.ToDouble(data[1]));
                        inputs.Add(System.Convert.ToDouble(data[2]));
                        inputs.Add(System.Convert.ToDouble(data[3]));
                        inputs.Add(System.Convert.ToDouble(data[4]));

                        double o1 = Map(0, 1, -1, 1, System.Convert.ToSingle(data[5]));
                        outputs.Add(o1);
                        double o2 = Map(0, 1, -1, 1, System.Convert.ToSingle(data[6]));
                        outputs.Add(o2);

                        calcOutputs = ann.Train(inputs, outputs);
                        thisError   = ((Mathf.Pow((float)(outputs[0] - calcOutputs[0]), 2) +
                                        Mathf.Pow((float)(outputs[1] - calcOutputs[1]), 2))) / 2.0f;
                    }
                    sse += thisError;
                }
                trainingProgress = (float)i / (float)epochs;
                sse /= lineCount;
                if (lastSSE < sse)
                {
                    ann.LoadWeights(currentWeights);
                    ann.alpha = Mathf.Clamp((float)ann.alpha - 0.001f, 0.01f, 0.9f);
                }
                else
                {
                    ann.alpha = Mathf.Clamp((float)ann.alpha + 0.001f, 0.01f, 0.9f);
                    lastSSE   = sse;
                }
                yield return(null);
            }
        }
        trainingDone = true;
    }
Exemple #7
0
    private void TrainAfterDead()
    {
        for (int i = replayMemory.Count - 1; i >= 0; i--)
        {
            List <double> toutputsOld = new List <double>();               // Q Values with current memory
            List <double> toutputsNew = new List <double>();               // Q Values for the next memory
            toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states)); // Old/current values

            double maxQOld = toutputsOld.Max();                            // max q value of the old/current memory
            int    action  = toutputsOld.ToList().IndexOf(maxQOld);        // best action according to that

            double feedback;
            if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1)
            {
                feedback = replayMemory[i].reward; // if we at the and, then we can't get next memory
            }
            else
            {
                toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states)); // calculate q values for the next memory
                double maxQ = toutputsNew.Max();                                   // max q values from the next state
                // bellman equation
                feedback = (replayMemory[i].reward +
                            discount * maxQ); // current reward + discount * maxQ`
                // taking this feedback for training ANN
            }

            toutputsOld[action] = feedback; // updating max. action of current states with feedback, use it as desired outputs
            ann.Train(replayMemory[i].states, toutputsOld);
        }

        Done();
        replayMemory.Clear();
        failCount++;
        episode++;
    }
Exemple #8
0
    //Run the ANN, that can train or calculate output based on a boolean
    List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        //Add the inputs to the inputs list
        inputs.Add(bx);
        inputs.Add(by);
        inputs.Add(bvx);
        inputs.Add(bvy);
        inputs.Add(px);
        inputs.Add(py);
        //Ad the expected output to outputs list
        outputs.Add(pv);

        //Call function according to boolean flag
        if (train)
        {
            return(ann.Train(inputs, outputs));
        }
        else
        {
            return(ann.CalcOutput(inputs, outputs));
        }
    }
    List <double> Train(double i1, double i2, double o)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(i1);
        inputs.Add(i2);
        outputs.Add(o);
        return(ann.Train(inputs, outputs));
    }
Exemple #10
0
    private List <double> Train(double i1, double i2, double o)
    {
        List <double> inputs         = new List <double>();
        List <double> desiredOutputs = new List <double>();

        inputs.Add(i1);
        inputs.Add(i2);
        desiredOutputs.Add(o);

        return(ann.Train(inputs, desiredOutputs));
    }
    private void FixedUpdate()
    {
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        states.Add(this.transform.position.y);
        states.Add(this.GetComponent <Rigidbody2D>().velocity.y);
        qs = ann.CalcOutput(states);
        this.GetComponent <Rigidbody2D>().AddForce(Vector2.up * force * (float)qs[0]);
        if (dead)
        {
            reward = -1;
        }
        else
        {
            reward = 0.1f;
        }
        replay lastmemory = new replay(this.transform.position.y, this.GetComponent <Rigidbody2D>().velocity.y, reward);

        if (replaymemory.Count > mcapacity)
        {
            replaymemory.RemoveAt(0);
        }
        replaymemory.Add(lastmemory);
        //Training And QLearning
        if (dead)
        {
            for (int i = replaymemory.Count - 1; i >= 0; i--)
            {
                List <double> toutputs_old = new List <double>();
                List <double> toutputs_new = new List <double>();
                toutputs_old = ann.CalcOutput(replaymemory[i].states);

                double feedback;
                if (i == replaymemory.Count - 1 || replaymemory[i].reward == -1)
                {
                    feedback = replaymemory[i].reward;
                }
                else
                {
                    toutputs_new = ann.CalcOutput(replaymemory[i + 1].states);
                    double maxQ = toutputs_new[0];
                    feedback = (replaymemory[i].reward + discount * maxQ);  //BELLMAN EQUATION
                }
                toutputs_old[0] = feedback;
                ann.Train(replaymemory[i].states, toutputs_old);
            }
            dead = false;
            Reset();
            replaymemory.Clear();
        }
    }
Exemple #12
0
    // Wrapper method for train and calculate output //
    List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train)
    {
        List <double> inputs = new List <double>()
        {
            bx, by, bvx, bvy, px, py
        };
        List <double> outputs = new List <double>()
        {
            pv
        };

        return(train ? ann.Train(inputs, outputs) : ann.CalcOutput(inputs, outputs));
    }
    List <double> Train(double i0, double i1, double i2, double i3, double i4, double i5, double output)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(i0);
        inputs.Add(i1);
        inputs.Add(i2);
        inputs.Add(i3);
        inputs.Add(i4);
        inputs.Add(i5);

        outputs.Add(output);
        return(ann.Train(inputs, outputs));
    }
    private IEnumerator LoadTrainingSet()
    {
        string path = Application.dataPath + "/CarRacing/trainingData.txt";
        string line;

        if (File.Exists(path))
        {
            int           lineCount    = File.ReadAllLines(path).Length;
            StreamReader  streamReader = File.OpenText(path);
            List <double> inputs       = new List <double>();
            List <double> outputs      = new List <double>();

            for (int i = 0; i < epochs; i++)
            {
                sse = 0;
                streamReader.BaseStream.Position = 0;
                string currentWeights = ann.PrintWeights();
                while ((line = streamReader.ReadLine()) != null)
                {
                    string[] data = line.Split(',');
                    //if nothing to be learned ignore this line
                    float thisError = 0;
                    if (System.Convert.ToDouble(data[5]) != 0 && System.Convert.ToDouble(data[6]) != 0)
                    {
                        inputs.Clear();
                        outputs.Clear();
                        FillInputs(inputs, data);
                        FillOutputs(outputs, data);

                        List <double> calcOutputs = ann.Train(inputs, outputs);
                        thisError = ((Mathf.Pow((float)(outputs[0] - calcOutputs[0]), 2) +
                                      Mathf.Pow((float)(outputs[1] - calcOutputs[1]), 2))) / 2.0f;
                    }
                    sse += thisError;
                }
                trainingProgress = (float)i / (float)epochs;
                sse /= lineCount;

                CorrectTraining(currentWeights);

                yield return(null);
            }
        }
        finishedTraining = true;
        SaveWeightsToFile();
    }
Exemple #15
0
    List <double> Train(int input1, int input2, int desiredOutput, bool updateWeights = true)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(input1);
        inputs.Add(input2);
        outputs.Add(desiredOutput);
        if (updateWeights)
        {
            return(ann.Train(inputs, outputs));
        }
        else
        {
            return(ann.CalcOutput(inputs, outputs));
        }
    }
Exemple #16
0
    private void Learn()
    {
        int layerMask = 1 << 8;

        RaycastHit2D hit       = Physics2D.Raycast(ball.transform.position, ballRb.velocity, 1000);
        GameState    gameState = new GameState(ball.position, ballRb.velocity, paddle.position, hit.point);

        //desired movement is current pos to pos that ball will be
        float desiredDelta = hit.point.y - paddle.position.y;

        List <double> inputs = gameState.GetInputs();
        List <double> output = ann.Train(inputs, new List <double> {
            desiredDelta
        });

        yvel = (float)output[0] * Time.deltaTime * paddleSpeed;
        print("Y vel = " + yvel);
    }
    List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(bx);  //ball x pos
        inputs.Add(by);  //ball y pos
        inputs.Add(bvx); //ball velocity x
        inputs.Add(bvy); //ball velocity y
        inputs.Add(px);  //paddle x pos
        inputs.Add(py);  //paddle y pos
        outputs.Add(pv); //paddle y velocity
        if (train)
        {
            return(ann.Train(inputs, outputs));
        }
        else
        {
            return(ann.CalcOutput(inputs, outputs));
        }
    }
Exemple #18
0
    public List <double> Run(double bxp, double byp, double pxp, double pyp, double bvx, double bvy, double pvy, bool train)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(bxp);
        inputs.Add(byp);
        inputs.Add(pxp);
        inputs.Add(pyp);
        inputs.Add(bvx);
        inputs.Add(bvy);
        outputs.Add(pvy);
        if (train)
        {
            return(ann.Train(inputs, outputs));
        }
        else
        {
            return(ann.CalcOutput(inputs, outputs));
        }
    }
    List <double> Run(double ballX, double ballY, double ballVelX, double ballVelY, double paddleX, double paddleY, double paddleVel, bool train)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(ballX);
        inputs.Add(ballY);
        inputs.Add(ballVelX);
        inputs.Add(ballVelY);
        inputs.Add(paddleX);
        inputs.Add(paddleY);
        outputs.Add(paddleVel);

        if (train)
        {
            return(ann.Train(inputs, outputs));
        }
        else
        {
            return(ann.CalcOutput(inputs, outputs));
        }
    }
Exemple #20
0
    List <double> Run(double ballX, double ballY, double ballVelocX, double ballVelocY, double paddleX, double paddleY, double paddleVelocity, bool train)
    {
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(ballX);
        inputs.Add(ballY);
        inputs.Add(ballVelocX);
        inputs.Add(ballVelocY);
        inputs.Add(paddleX);
        inputs.Add(paddleY);
        outputs.Add(paddleVelocity);

        if (train)
        {
            return(_artificialNeuronNetwork.Train(inputs, outputs));
        }
        else
        {
            return(_artificialNeuronNetwork.CalcOutput(inputs, outputs));
        }
    }
    // METHOD - This method either does training or does calculations without affecting the training
    List <double> Run(double bx, double by, double bvx, double bvy, double px, double py, double pv, bool train)
    {
        // Six inputs and One Output
        List <double> inputs  = new List <double>();
        List <double> outputs = new List <double>();

        inputs.Add(bx);  // Ball x position
        inputs.Add(by);  // Ball y position
        inputs.Add(bvx); // Ball x velocity
        inputs.Add(bvy); // Ball y velocity
        inputs.Add(px);  // Paddle x position
        inputs.Add(py);  // Paddle y position
        outputs.Add(pv); // Paddle velocity, this is ignored when we are calculating and not training
        // If training is selected, then go ahead and perform the training
        if (train)
        {
            return(ann.Train(inputs, outputs));
        }
        else
        {
            // Otherwise, only calculate the output without affecting the training
            return(ann.CalcOutput(inputs, outputs));
        }
    }
    private void FixedUpdate()
    {
        timer += Time.deltaTime;
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        states.Add(this.transform.rotation.x);
        states.Add(ball.transform.position.z);
        states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x);

        qs = SoftMax(ann.CalcOutput(states));
        double maxQ      = qs.Max();
        int    maxQindex = qs.ToList().IndexOf(maxQ);

        explorerate = Mathf.Clamp(explorerate - exploredecay, minexplorerate, maxeplorerate);

        /*NO NEED OF EXPLORING IN THIS CASE AS ENVIRONMENT IS REALLY VERY SMALL
         * if(Random.Range(0,100)<explorerate)
         * {
         *   maxQindex = Random.Range(0, 2);
         * }*/
        if (maxQindex == 0)
        {
            this.transform.Rotate(Vector3.right, tiltspeed * (float)qs[maxQindex]);
        }
        else if (maxQindex == 1)
        {
            this.transform.Rotate(Vector3.right, -tiltspeed * (float)qs[maxQindex]);
        }
        if (ball.GetComponent <BallState>().dropped)
        {
            reward = -1;
        }
        else
        {
            reward = 0.1f;
        }
        Replay lastmemory = new Replay(this.transform.rotation.x, ball.transform.position.z, ball.GetComponent <Rigidbody>().angularVelocity.x, reward);

        if (replaymemory.Count > mcapacity)
        {
            replaymemory.RemoveAt(0);
        }
        replaymemory.Add(lastmemory);

        //Training and QLEARNING

        if (ball.GetComponent <BallState>().dropped)
        {
            for (int i = replaymemory.Count - 1; i >= 0; i--)
            {
                List <double> toutputs_old = new List <double>();
                List <double> toutputs_new = new List <double>();
                toutputs_old = SoftMax(ann.CalcOutput(replaymemory[i].states));
                double maxQ_old = toutputs_old.Max();
                int    action   = toutputs_old.ToList().IndexOf(maxQ_old);

                double feedback;
                if (i == replaymemory.Count - 1 || replaymemory[i].reward == -1)
                {
                    feedback = replaymemory[i].reward;
                }
                else
                {
                    toutputs_new = SoftMax(ann.CalcOutput(replaymemory[i + 1].states));
                    maxQ         = toutputs_new.ToList().Max();
                    feedback     = (replaymemory[i].reward + discount * maxQ); //BELLMAN EQUATION
                }
                toutputs_old[action] = feedback;
                ann.Train(replaymemory[i].states, toutputs_old);
            }
            if (timer > maxBalanceTime)
            {
                maxBalanceTime = timer;
            }
            timer = 0;
            ball.GetComponent <BallState>().dropped = false;
            this.transform.rotation = Quaternion.identity;
            Reset();
            replaymemory.Clear();
            fallcount++;
        }
    }
Exemple #23
0
    void FixedUpdate()
    {
        timer += Time.deltaTime;
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        states.Add(this.transform.rotation.x);
        states.Add(this.transform.position.z);
        states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x); // reflection not working in VCS

        qs = SoftMax(ann.CalcOutput(states));                          // why a softmax?
        double maxQ      = qs.Max();                                   // cost: O(L), where L is length of the list
        int    maxQIndex = qs.ToList().IndexOf(maxQ);                  // cost is O(L)

        // in my opinion, exploreRate should decrease after each fail and not after each fixedUpdate
        exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate);

        // Udemy: remove these lines will accelerate convergence
        // more exporation early on, and less later on
        if (Random.Range(0, 100) < exploreRate)
        {
            maxQIndex = Random.Range(0, 2);              // choose either 0 or 1
        }
        if (maxQIndex == 0)
        {
            // public void Rotate(Vector3 eulerAngles, Space relativeTo = Space.Self);
            // public void Rotate(Vector3 axis, float angle, Space relativeTo = Space.Self);
            this.transform.Rotate(Vector3.right, tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 1)
        {
            this.transform.Rotate(Vector3.right, -tiltSpeed * (float)qs[maxQIndex]);
        }

        if (ball.GetComponent <BallState>().dropped)
        {
            reward = -1.0f;
        }
        else
        {
            reward = 0.1f;               // [0.1f]
        }
        Replay lastMemory = new Replay(this.transform.rotation.x,
                                       ball.transform.position.z,
                                       ball.GetComponent <Rigidbody>().angularVelocity.x,
                                       reward);

        if (replayMemory.Count > mCapacity)
        {
            replayMemory.RemoveAt(0);
        }

        replayMemory.Add(lastMemory);

        if (ball.GetComponent <BallState>().dropped)
        {
            for (int i = replayMemory.Count - 1; i >= 0; i--)
            {
                List <double> toutputsOld = new List <double>();
                List <double> toutputsNew = new List <double>();
                toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states));                  // why a softmax?

                double maxQOld = toutputsOld.Max();
                int    action  = toutputsOld.ToList().IndexOf(maxQOld);

                double feedback;
                if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1)
                {
                    feedback = replayMemory[i].reward;
                }
                else
                {
                    toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states));
                    maxQ        = toutputsNew.Max();
                    feedback    = (replayMemory[i].reward +
                                   discount * maxQ);
                }

                toutputsOld[action] = feedback;
                ann.Train(replayMemory[i].states, toutputsOld);
            }

            if (timer > maxBalanceTime)
            {
                maxBalanceTime = timer;
            }

            timer = 0;

            ball.GetComponent <BallState>().dropped = false;
            this.transform.rotation = Quaternion.identity;
            ResetBall();
            replayMemory.Clear();
            failCount++;
        }
    }
Exemple #24
0
    void FixedUpdate()
    {
        frames++;
        // seeGround = true;
        // isOnGround = Physics2D.OverlapCircle(groundCheck.position, groundCheckRadius, whatIsGround);
        Debug.DrawRay(theEyes.transform.position, theEyes.transform.right * 20, Color.green);
        RaycastHit2D hit = Physics2D.Raycast(theEyes.transform.position, theEyes.transform.right * 20);

        if (hit && hit.collider.tag == "Killbox")
        {
            seeGround = false;
            Debug.DrawRay(theEyes.transform.position, theEyes.transform.right * 20, Color.red);
        }
        // double[] distancesFromObjects = new double[platforms.Length];
        // for(int i = 0; i < platforms.Length; i++)
        // {
        //  Vector3 heading = transform.position - platforms[i].transform.position;
        //  distancesFromObjects[i] = heading.magnitude;
        // }
        // // second closest, to be honest
        // System.Array.Sort(distancesFromObjects);
        // double closestPlatform = distancesFromObjects[1];
        // int indexOfClosest = distancesFromObjects.ToList().IndexOf(closestPlatform);
        // Vector3 closestPoint = platforms[indexOfClosest].transform.position;

        timer += Time.deltaTime;
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        GameObject[] platforms    = GameObject.FindGameObjectsWithTag("platform");
        Vector3      bestPoint    = GetClosestEnemy(platforms);
        Vector3      closestPoint = GetClosestGap(platforms);

        Vector3 directionToNextPlatform = bestPoint - transform.position;
        Vector3 directionToNextGap      = closestPoint - transform.position;

        // states.Add(transform.position.y);
        // states.Add(rb.velocity.y);
        states.Add(directionToNextPlatform.x);
        // states.Add(directionToNextPlatform.y);
        states.Add(directionToNextGap.x);
        // Debug.Log(directionToNextGap.x);

        qs = SoftMax(ann.CalcOutput(states));
        double maxQ      = qs.Max();
        int    maxQIndex = qs.ToList().IndexOf(maxQ);

        exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate);

        if (Random.Range(0, 100) < exploreRate)
        {
            maxQIndex = Random.Range(0, 2);
        }

        if (maxQIndex == 1)
        {
            sumOfJumps++;
        }

        if (maxQIndex == 0)
        {
            sumOfStays++;
        }

        if (frames % 8 == 0)
        {
            if (sumOfJumps > sumOfStays)
            {
                robotAccess.RobotJump();
            }
            sumOfStays = 0;
            sumOfJumps = 0;
            frames     = 0;
        }

        if (rb.velocity.x < 0.5)
        {
            robotAccess.RobotJump();
        }

        if (hitObstacle)
        {
            reward = -5.0f;
        }
        else
        {
            reward = 0.1f;
        }



        Replay lastMemory = new Replay(
            // transform.position.y,
            // rb.velocity.y,
            directionToNextPlatform.x,
            // directionToNextPlatform.y,
            directionToNextGap.x,
            reward);

        if (replayMemory.Count > mCapacity)
        {
            replayMemory.RemoveAt(0);
        }

        replayMemory.Add(lastMemory);

        if (hitObstacle)
        {
            for (int i = replayMemory.Count - 1; i >= 0; i--)
            {
                List <double> tOutputsOld = new List <double>();
                List <double> tOutputsNew = new List <double>();
                tOutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states));

                double maxQOld = tOutputsOld.Max();
                int    action  = tOutputsOld.ToList().IndexOf(maxQOld);

                double feedback;
                if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1)
                {
                    feedback = replayMemory[i].reward;
                }
                else
                {
                    tOutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states));
                    maxQ        = tOutputsNew.Max();
                    feedback    = (replayMemory[i].reward + discount * maxQ);
                }

                tOutputsOld[action] = feedback;
                ann.Train(replayMemory[i].states, tOutputsOld);
            }
            if (timer > maxBalanceTime)
            {
                maxBalanceTime = timer;
            }

            timer = 0;

            hitObstacle = false;
            theGameManager.Reset();
            replayMemory.Clear();
            failCount++;
        }
    }
    /// <summary>
    /// Loads the training dataset.
    /// </summary>
    /// <returns> null (Coroutine). </returns>
    private IEnumerator LoadTrainingSet()
    {
        string dataSetFilePath = Application.dataPath + dataSetFolder + dataSetFileName;

        // An instance of training (row in the dataSet).
        string instance;

        if (File.Exists(dataSetFilePath))
        {
            Debug.Log("Using training data file found at: " + dataSetFilePath);

            int          instanceCount     = File.ReadAllLines(dataSetFilePath).Length;
            StreamReader dataSetFile       = File.OpenText(dataSetFilePath);
            var          calculatedOutputs = new List <double>();
            var          inputs            = new List <double>();
            var          desiredOutputs    = new List <double>();

            for (var i = 0; i < epochs; i++)
            {
                sumSquaredError = 0;

                // Set file pointer to beginning of file.
                dataSetFile.BaseStream.Position = 0;

                string currentWeights = ann.PrintWeights();

                // Read one instance (line) at a time until the end of the dataSet.
                while ((instance = dataSetFile.ReadLine()) != null)
                {
                    // Separate each feature (column) of the current instance (row).
                    string[] features = instance.Split(',');

                    // The error we get from a particular instance.
                    // If nothing to be learned, ignore this line.
                    float thisError = 0;

                    // Ignore instances, where no user input was recorded.
                    // They provide no useful information.
                    // TODO: Fix floating point number comparison!?
                    if (System.Convert.ToDouble(features[5]) != 0 &&
                        System.Convert.ToDouble(features[6]) != 0)
                    {
                        inputs.Clear();
                        desiredOutputs.Clear();

                        // TODO: Check that training data and inputs are calculated the same way (rounding, normalizing, etc.).
                        // Assign the first five features (raycast distances) to inputs.
                        for (int j = 0; j < 5; j++)
                        {
                            inputs.Add(System.Convert.ToDouble(features[j]));
                        }

                        // Assigns the remaining two features (user input) to outputs.
                        for (int j = 5; j < 7; j++)
                        {
                            double output = Helpers.Map(0, 1, -1, 1, System.Convert.ToSingle(features[j]));
                            desiredOutputs.Add(output);
                        }

                        // Train the Neural Network.
                        calculatedOutputs = ann.Train(inputs, desiredOutputs);

                        // Calculate individual squaredErrors.
                        float output0ErrorSquared = Mathf.Pow((float)(desiredOutputs[0] - calculatedOutputs[0]), 2);
                        float output1ErrorSquared = Mathf.Pow((float)(desiredOutputs[1] - calculatedOutputs[1]), 2);
                        // Calculate averaged sum of squared errors.
                        thisError = (output0ErrorSquared + output1ErrorSquared) / 2f;
                    }
                    sumSquaredError += thisError;
                }
                // Percentage value.
                trainingProgress = (float)i / (float)epochs;

                // Calculate average sumOfSquaredErrors.
                sumSquaredError /= instanceCount;

                AdaptLearning(currentWeights);

                yield return(null);
            }
        }
        else
        {
            Debug.LogError("No training data file found at: " + dataSetFilePath);
        }
        trainingDone = true;

        if (!loadWeightsFromFile)
        {
            SaveWeightsToFile();
        }
    }
Exemple #26
0
    void FixedUpdate()
    {
        timer += Time.deltaTime;
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        RaycastHit hit;

        float fDist = visibleDistance, rDist = visibleDistance, lDist = visibleDistance, r45Dist = visibleDistance, l45Dist = visibleDistance;



        if (Physics.Raycast(transform.position, this.transform.forward, out hit, visibleDistance, terrainLayer))
        {
            fDist = Vector3.Distance(transform.position, hit.point);
        }

        if (Physics.Raycast(transform.position, this.transform.right, out hit, visibleDistance, terrainLayer))
        {
            rDist = Vector3.Distance(transform.position, hit.point);
        }

        if (Physics.Raycast(transform.position, -this.transform.right, out hit, visibleDistance, terrainLayer))
        {
            lDist = Vector3.Distance(transform.position, hit.point);
        }

        if (Physics.Raycast(transform.position, Quaternion.AngleAxis(-45, Vector3.up) * this.transform.right, out hit, visibleDistance, terrainLayer))
        {
            r45Dist = Vector3.Distance(transform.position, hit.point);
        }

        if (Physics.Raycast(transform.position, Quaternion.AngleAxis(45, Vector3.up) * -this.transform.right, out hit, visibleDistance, terrainLayer))
        {
            l45Dist = hit.distance;
        }

        // Debug.Log("Frontal: " + fDist + ", Derecha: " + rDist + ", Izquierda: " + lDist + ", Derecha45: " + r45Dist + ", Izquierda45: " + l45Dist);

        states.Add(fDist);
        states.Add(rDist);
        states.Add(lDist);
        states.Add(r45Dist);
        states.Add(l45Dist);

        qs = SoftMax(ann.CalcOutput(states));
        double maxQ      = qs.Max();
        int    maxQIndex = qs.ToList().IndexOf(maxQ);

        //exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate);


        //if(Random.Range(0,100) < exploreRate)
        //	maxQIndex = Random.Range(0,2);


        float translation = speed * Time.deltaTime;

        this.transform.Translate(0, 0, translation);



        if (maxQIndex == 0)
        {
            this.transform.Rotate(Vector3.up, tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 1)
        {
            this.transform.Rotate(Vector3.up, -tiltSpeed * (float)qs[maxQIndex]);
        }


        if (ball.GetComponent <BallState>().dropped)
        {
            reward = -1.0f;
            //reward = 0;
        }

        else if (ball.GetComponent <BallState>().point)
        {
            reward = 0.5f;
        }
        else
        {
            reward = 0.1f;// + 0.01f;
        }
        Replay lastMemory = new Replay(fDist, rDist, lDist, r45Dist, l45Dist,
                                       reward);

        if (replayMemory.Count > mCapacity)
        {
            replayMemory.RemoveAt(0);
        }

        replayMemory.Add(lastMemory);

        if (ball.GetComponent <BallState>().dropped)
        {
            ResetBall();    //Para que no se quede pillado al no tener archivo.

            for (int i = replayMemory.Count - 1; i >= 0; i--)
            {
                List <double> toutputsOld = new List <double>();
                List <double> toutputsNew = new List <double>();
                toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states));

                double maxQOld = toutputsOld.Max();
                int    action  = toutputsOld.ToList().IndexOf(maxQOld);

                double feedback;
                if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1)
                {
                    feedback = replayMemory[i].reward;
                }
                else
                {
                    toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states));
                    maxQ        = toutputsNew.Max();
                    feedback    = (replayMemory[i].reward +
                                   discount * maxQ);
                }

                toutputsOld[action] = feedback;
                ann.Train(replayMemory[i].states, toutputsOld);
            }



            timer = 0;

            ball.GetComponent <BallState>().dropped = false;
            this.transform.rotation = Quaternion.identity;
            ResetBall();
            replayMemory.Clear();
            failCount++;
            if (_isAleatoryCircuit)
            {
                if (failCount == 1000)
                {
                    flowchart.ExecuteBlock("LOSE");
                }
                //Debug.Log( "Fails: " + failCount);
                onFail?.Invoke(failCount);
            }

            //reward = 0;/////////////////////////////////
        }

        if (ball.GetComponent <BallState>().meta)
        {
            ball.GetComponent <BallState>().meta = false;
            //string pesos = PlayerPrefs.GetString("Weights");
            string pesos = ann.PrintWeights();



            if (_isAleatoryCircuit)
            {
                if (!WIN && failCount <= 1000)
                {
                    if (flowchart != null)
                    {
                        flowchart.ExecuteBlock("WIN");
                    }
                    WIN = true;
                }

                /*List<string> saveFileContent = new List<string>();
                 * saveFileContent.Add(currentAleatoryCircuitName);
                 * saveFileContent.Add(pesos);
                 * SaveAndLoad.Save(saveFileContent, currentAleatoryCircuitName + ".txt");*/

                manager.SaveNewDataDictionary(currentAleatoryCircuitName, pesos);
                Debug.Log(currentAleatoryCircuitName);
            }
            else
            {
                managerCircuits.SaveNewDataDictionary(circuitName, pesos);
            }

            /*
             * if (!_isAleatoryCircuit && maxBalanceTime <= 0)
             * {
             *  pesos = ann.PrintWeights();
             *  SaveAndLoad.Save(pesos, CIRCUITO1);
             * }
             * else if(!_isAleatoryCircuit && maxBalanceTime > timer)
             * {
             *  pesos = ann.PrintWeights();
             *  SaveAndLoad.Save(pesos, CIRCUITO1);
             * }*/
            maxBalanceTime = timer;
            Debug.Log(maxBalanceTime);
            timer = 0;
        }
    }
Exemple #27
0
    private void FixedUpdate()
    {
        timer += Time.deltaTime;
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        states.Add(this.transform.rotation.x);
        states.Add(ball.transform.position.z);
        states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x);

        qs = SoftMax(ann.CalcOutput(states));
        double maxQ      = qs.Max();
        int    maxQIndex = qs.ToList().IndexOf(maxQ);

        exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate);

        if (Random.Range(0, 10000) < exploreRate)
        {
            maxQIndex = Random.Range(0, 2);
        }

        if (maxQIndex == 0)
        {
            this.transform.Rotate(Vector3.right, tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 1)
        {
            this.transform.Rotate(Vector3.right, -tiltSpeed * (float)qs[maxQIndex]);
        }

        if (ball.GetComponent <BallState>().dropped)
        {
            reward = -5.0f;
        }
        else
        {
            reward = 0.1f;
        }

        Replay lastMemory = new Replay(this.transform.rotation.x,
                                       ball.transform.position.z,
                                       ball.GetComponent <Rigidbody>().angularVelocity.x,
                                       reward);

        if (replayMemory.Count > mCapacity)
        {
            replayMemory.RemoveAt(0);
        }

        replayMemory.Add(lastMemory);

        if (ball.GetComponent <BallState>().dropped)
        {
            for (int i = replayMemory.Count - 1; i >= 0; i--)
            {
                List <double> toutputOld = new List <double>();
                List <double> toutputNew = new List <double>();
                toutputOld = SoftMax(ann.CalcOutput(replayMemory[i].states));

                double maxQOld = toutputOld.Max();
                int    action  = toutputOld.ToList().IndexOf(maxQOld);

                double feedback;
                if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1)
                {
                    feedback = replayMemory[i].reward;
                }
                else
                {
                    toutputNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states));
                    maxQ       = toutputNew.Max();
                    feedback   = (replayMemory[i].reward * discount * maxQ);
                }

                toutputOld[action] = feedback;
                ann.Train(replayMemory[i].states, toutputOld);
            }

            if (timer > maxBalanceTime)
            {
                maxBalanceTime = timer;
            }

            timer = 0;

            ball.GetComponent <BallState>().dropped = false;
            this.transform.rotation = Quaternion.identity;
            ResetBall();
            replayMemory.Clear();
            failCount++;
        }
    }
Exemple #28
0
    private void FixedUpdate()
    {
        timer += Time.deltaTime;
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        states.Add(this.transform.rotation.x);
        states.Add(this.transform.rotation.z);
        states.Add(this.transform.position.z);
        states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x);
        states.Add(ball.GetComponent <Rigidbody>().angularVelocity.z);

        qs = ANN.SoftMax(ann.CalculateOutput(states));
        double maxQ      = qs.Max();
        int    maxQIndex = qs.ToList().IndexOf(maxQ);

        exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate);

        //check to see if we choose a random action
        if (UnityEngine.Random.Range(1, 100) < exploreRate)
        {
            maxQIndex = UnityEngine.Random.Range(0, 4);
        }

        //action 0 tilt right
        //action 1 tilt left
        //action 2 tilt forward
        //action 3 tilt backward
        //mapQIndex == 0 means action 0
        if (maxQIndex == 0)
        {
            this.transform.Rotate(Vector3.right, tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 1)
        {
            this.transform.Rotate(Vector3.right, -tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 2)
        {
            this.transform.Rotate(Vector3.forward, tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 3)
        {
            this.transform.Rotate(Vector3.forward, -tiltSpeed * (float)qs[maxQIndex]);
        }

        if (ball.GetComponent <BallState>().dropped)
        {
            reward = -1f;
        }
        else
        {
            reward = 0.1f;
        }

        Replay lastMemory = new Replay(this.transform.rotation.x,
                                       this.transform.rotation.z,
                                       ball.transform.position.z,
                                       ball.GetComponent <Rigidbody>().angularVelocity.x,
                                       ball.GetComponent <Rigidbody>().angularVelocity.z,
                                       reward);

        if (replayMemory.Count > mCapacity)
        {
            replayMemory.RemoveAt(0);
        }

        replayMemory.Add(lastMemory);

        //Q learning starts here
        //upto this point all we did is get an inputs and getting the result from ann,
        //rewarding accordingly and then storing them.
        if (ball.GetComponent <BallState>().dropped)
        {
            //looping backwards so the quality of the last memory get carried
            //backwards up through the list so we can attributed it's blame through
            //the list
            for (int i = replayMemory.Count - 1; i >= 0; --i)
            {
                //foreach memory we ran the ann
                //first we found out what are the q values of the current memory
                List <double> currentMemoryQValues = new List <double>();
                //then we take the q values of the next memory
                List <double> nextMemoryQValues = new List <double>();
                currentMemoryQValues = ANN.SoftMax(ann.CalculateOutput(replayMemory[i].states));

                //find the maximum Q value of the current memories
                double maxQOld = currentMemoryQValues.Max();
                //which action gave that q value
                int action = currentMemoryQValues.ToList().IndexOf(maxQOld);

                double feedback;
                //checking if the current memory is the last memeory
                //or if that memory reward is -1, if it is -1, it means, that ball was dropped
                //and every memory after this is meaningless, because this is the end of the
                //memories sequance
                if ((i == replayMemory.Count - 1) || (replayMemory[i].reward == -1f))
                {
                    feedback = replayMemory[i].reward;
                }
                else
                {
                    nextMemoryQValues = ANN.SoftMax(ann.CalculateOutput(replayMemory[i + 1].states));
                    maxQ     = nextMemoryQValues.Max();
                    feedback = (replayMemory[i].reward + discount * maxQ);
                }

                //adding the correct reward (Q value) to the current action
                currentMemoryQValues[action] = feedback;
                //using the feedback to train the ANN
                ann.Train(replayMemory[i].states, currentMemoryQValues);
            }

            if (timer > maxBalanceTime)
            {
                maxBalanceTime = timer;
            }

            timer = 0;

            ball.GetComponent <BallState>().dropped = false;
            this.transform.rotation = Quaternion.identity;
            ResetBall();
            replayMemory.Clear();
            failCount++;
        }
    }
Exemple #29
0
    void FixedUpdate()
    {
        timer += Time.deltaTime;
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        states.Add(Vector3.Distance(this.transform.position, topBeam.transform.position));
        states.Add(Vector3.Distance(this.transform.position, bottomBeam.transform.position));

        qs = SoftMax(ann.CalcOutput(states));
        double maxQ      = qs.Max();
        int    maxQIndex = qs.ToList().IndexOf(maxQ);

        exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate);

        //if(Random.Range(0,100) < exploreRate)
        //	maxQIndex = Random.Range(0,2);

        if (maxQIndex == 0)
        {
            rb.AddForce(Vector3.up * moveForce * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 1)
        {
            rb.AddForce(Vector3.up * -moveForce * (float)qs[maxQIndex]);
        }

        if (crashed)
        {
            reward = -1.0f;
        }
        else
        {
            reward = 0.1f;
        }

        Replay lastMemory = new Replay(Vector3.Distance(this.transform.position, topBeam.transform.position),
                                       Vector3.Distance(this.transform.position, bottomBeam.transform.position),
                                       reward);

        if (replayMemory.Count > mCapacity)
        {
            replayMemory.RemoveAt(0);
        }

        replayMemory.Add(lastMemory);

        if (crashed)
        {
            for (int i = replayMemory.Count - 1; i >= 0; i--)
            {
                List <double> toutputsOld = new List <double>();
                List <double> toutputsNew = new List <double>();
                toutputsOld = SoftMax(ann.CalcOutput(replayMemory[i].states));

                double maxQOld = toutputsOld.Max();
                int    action  = toutputsOld.ToList().IndexOf(maxQOld);

                double feedback;
                if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1)
                {
                    feedback = replayMemory[i].reward;
                }
                else
                {
                    toutputsNew = SoftMax(ann.CalcOutput(replayMemory[i + 1].states));
                    maxQ        = toutputsNew.Max();
                    feedback    = (replayMemory[i].reward +
                                   discount * maxQ);
                }

                toutputsOld[action] = feedback;
                ann.Train(replayMemory[i].states, toutputsOld);
            }

            if (timer > maxBalanceTime)
            {
                maxBalanceTime = timer;
            }

            timer = 0;

            crashed = false;
            ResetBird();
            replayMemory.Clear();
            failCount++;
        }
    }
Exemple #30
0
    // Method to perform the ANN training using data collected from the player.
    IEnumerator LoadTrainingSet()
    {
        string path = Application.dataPath + "/trainingData.txt";
        string line;

        if (File.Exists(path))
        {
            int          lineCount = File.ReadAllLines(path).Length;
            StreamReader tdf       = File.OpenText(path);

            List <double> calcOutputs = new List <double>();
            List <double> inputs      = new List <double>();
            List <double> outputs     = new List <double>();

            //Loop through the epochs
            for (int i = 0; i < epochs; i++)
            {
                //set file pointer to beginning of file
                sse = 0;
                tdf.BaseStream.Position = 0;

                //Get the current weight comma separated string values from the ANN object
                string currentWeights = ann.PrintWeights();

                // Load the training data, line by line
                while ((line = tdf.ReadLine()) != null)
                {
                    string[] data = line.Split(',');
                    //if nothing to be learned ignore this line
                    float thisError = 0;

                    //We are leaving out those data where we have training labels or y (translation and rotation values) with values zero to reduce the data set.
                    if (System.Convert.ToDouble(data[5]) != 0 && System.Convert.ToDouble(data[6]) != 0) //If translation and rotation outputs in training data are not zero
                    {
                        //Clear out lists from previous row
                        inputs.Clear();
                        outputs.Clear();

                        //Add training input data to the inputs to the ANN
                        inputs.Add(System.Convert.ToDouble(data[0]));
                        inputs.Add(System.Convert.ToDouble(data[1]));
                        inputs.Add(System.Convert.ToDouble(data[2]));
                        inputs.Add(System.Convert.ToDouble(data[3]));
                        inputs.Add(System.Convert.ToDouble(data[4]));

                        //Map labels to range (0,1) for efficient training
                        double o1 = Map(0, 1, -1, 1, System.Convert.ToSingle(data[5]));
                        outputs.Add(o1);
                        double o2 = Map(0, 1, -1, 1, System.Convert.ToSingle(data[6]));
                        outputs.Add(o2);

                        //Calculated output (y-hat)
                        calcOutputs = ann.Train(inputs, outputs);
                        //Sum squared Error value: for both labels
                        thisError = ((Mathf.Pow((float)(outputs[0] - calcOutputs[0]), 2) +
                                      Mathf.Pow((float)(outputs[1] - calcOutputs[1]), 2))) / 2.0f;
                    }
                    //Add this to cumulative SSE for the epoch
                    sse += thisError;
                }

                //Percentage training to display on screen
                trainingProgress = (float)i / (float)epochs;

                // Average SSE
                sse /= lineCount;

                //If sse isn't better then reload previous set of weights and decrease alpha. This adaptive training to let the ANN move out
                // of local optima and hence find global optima.
                if (lastSSE < sse)
                {
                    ann.LoadWeights(currentWeights);
                    ann.alpha = Mathf.Clamp((float)ann.alpha - 0.001f, 0.01f, 0.9f);
                }
                else //increase alpha
                {
                    ann.alpha = Mathf.Clamp((float)ann.alpha + 0.001f, 0.01f, 0.9f);
                    lastSSE   = sse;
                }

                yield return(null); //Allow OnGUI some time to update on-screen values
            }
        }
        //Training done, save weights
        trainingDone = true;
        SaveWeightsToFile();
    }