C# (CSharp) ANN.SoftMax примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: ANN

Метод/Функция: SoftMax

Примеров на hotexamples.com: 2

C# (CSharp) ANN.SoftMax - 2 примера найдено. Это лучшие примеры C# (CSharp) кода для ANN.SoftMax, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Train(30)

CalcOutput(30)

PrintWeights(14)

LoadWeights(10)

Go(6)

AddLayer(4)

SetNumNeurons(4)

CalculateOutput(3)

SoftMax(2)

SaveWeightsToFile(2)

RunANNQuery(2)

LoadWeightsFromFile(2)

ParseCommand(2)

ForwardPass(2)

Crossover(2)

SetVal(1)

SetExemplar(1)

SetExemplarFromMatrix(1)

CloneDataInto(1)

SetTest(1)

SetTrain(1)

GetPath(1)

RunNetwork(1)

SetWeights(1)

Sigmoid(1)

Clone(1)

SuffleExemplar(1)

Task3RightWrong(1)

Calculate(1)

SetVar(1)

FeedForward(1)

RunIris(1)

Mutate(1)

GetVar(1)

GetVarAs(1)

GetNumberOfNeuronsBefore(1)

InitializeWith(1)

GetNumNeurons(1)

GenerateGraph(1)

NormalizeData(1)

GetTotalNumberOfNeurons(1)

Output(1)

Predict(1)

PrintBias(1)

ForwardGo(1)

ReadData(1)

ResetData(1)

TrainBatch(1)

Пример #1

Показать файл

Файл: Brain.cs Проект: sarignamer/Unity

    private void FixedUpdate()
    {
        timer += Time.deltaTime;
        List <double> states = new List <double>();
        List <double> qs     = new List <double>();

        states.Add(this.transform.rotation.x);
        states.Add(this.transform.rotation.z);
        states.Add(this.transform.position.z);
        states.Add(ball.GetComponent <Rigidbody>().angularVelocity.x);
        states.Add(ball.GetComponent <Rigidbody>().angularVelocity.z);

        qs = ANN.SoftMax(ann.CalculateOutput(states));
        double maxQ      = qs.Max();
        int    maxQIndex = qs.ToList().IndexOf(maxQ);

        exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate);

        //check to see if we choose a random action
        if (UnityEngine.Random.Range(1, 100) < exploreRate)
        {
            maxQIndex = UnityEngine.Random.Range(0, 4);
        }

        //action 0 tilt right
        //action 1 tilt left
        //action 2 tilt forward
        //action 3 tilt backward
        //mapQIndex == 0 means action 0
        if (maxQIndex == 0)
        {
            this.transform.Rotate(Vector3.right, tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 1)
        {
            this.transform.Rotate(Vector3.right, -tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 2)
        {
            this.transform.Rotate(Vector3.forward, tiltSpeed * (float)qs[maxQIndex]);
        }
        else if (maxQIndex == 3)
        {
            this.transform.Rotate(Vector3.forward, -tiltSpeed * (float)qs[maxQIndex]);
        }

        if (ball.GetComponent <BallState>().dropped)
        {
            reward = -1f;
        }
        else
        {
            reward = 0.1f;
        }

        Replay lastMemory = new Replay(this.transform.rotation.x,
                                       this.transform.rotation.z,
                                       ball.transform.position.z,
                                       ball.GetComponent <Rigidbody>().angularVelocity.x,
                                       ball.GetComponent <Rigidbody>().angularVelocity.z,
                                       reward);

        if (replayMemory.Count > mCapacity)
        {
            replayMemory.RemoveAt(0);
        }

        replayMemory.Add(lastMemory);

        //Q learning starts here
        //upto this point all we did is get an inputs and getting the result from ann,
        //rewarding accordingly and then storing them.
        if (ball.GetComponent <BallState>().dropped)
        {
            //looping backwards so the quality of the last memory get carried
            //backwards up through the list so we can attributed it's blame through
            //the list
            for (int i = replayMemory.Count - 1; i >= 0; --i)
            {
                //foreach memory we ran the ann
                //first we found out what are the q values of the current memory
                List <double> currentMemoryQValues = new List <double>();
                //then we take the q values of the next memory
                List <double> nextMemoryQValues = new List <double>();
                currentMemoryQValues = ANN.SoftMax(ann.CalculateOutput(replayMemory[i].states));

                //find the maximum Q value of the current memories
                double maxQOld = currentMemoryQValues.Max();
                //which action gave that q value
                int action = currentMemoryQValues.ToList().IndexOf(maxQOld);

                double feedback;
                //checking if the current memory is the last memeory
                //or if that memory reward is -1, if it is -1, it means, that ball was dropped
                //and every memory after this is meaningless, because this is the end of the
                //memories sequance
                if ((i == replayMemory.Count - 1) || (replayMemory[i].reward == -1f))
                {
                    feedback = replayMemory[i].reward;
                }
                else
                {
                    nextMemoryQValues = ANN.SoftMax(ann.CalculateOutput(replayMemory[i + 1].states));
                    maxQ     = nextMemoryQValues.Max();
                    feedback = (replayMemory[i].reward + discount * maxQ);
                }

                //adding the correct reward (Q value) to the current action
                currentMemoryQValues[action] = feedback;
                //using the feedback to train the ANN
                ann.Train(replayMemory[i].states, currentMemoryQValues);
            }

            if (timer > maxBalanceTime)
            {
                maxBalanceTime = timer;
            }

            timer = 0;

            ball.GetComponent <BallState>().dropped = false;
            this.transform.rotation = Quaternion.identity;
            ResetBall();
            replayMemory.Clear();
            failCount++;
        }
    }

Пример #2

Показать файл

Файл: BalanceBrain.cs Проект: adamWeesner/MachineLearningWithUnity

    private void FixedUpdate() {
        timer += Time.deltaTime;
        List<double> states = new List<double>();
        List<double> qs = new List<double>();

        states.Add(this.transform.rotation.x);
        states.Add(ball.transform.position.z);
        states.Add(ball.GetComponent<Rigidbody>().angularVelocity.x);

        qs = ann.SoftMax(ann.CalcOutput(states));
        double maxQ = qs.Max();
        Debug.Log("quality: " + maxQ);

        int maxQIndex = qs.ToList().IndexOf(maxQ);
        exploreRate = Mathf.Clamp(exploreRate - exploreDecay, minExploreRate, maxExploreRate);

        //if (Random.Range(0, 100) < exploreRate)
        //    maxQIndex = Random.Range(0, 2);

        if (maxQIndex == 0)
            this.transform.Rotate(Vector3.right, tiltSpeed * (float)qs[maxQIndex]);
        else if (maxQIndex == 1)
            this.transform.Rotate(Vector3.right, -tiltSpeed * (float)qs[maxQIndex]);

        if (ball.GetComponent<BallState>().dropped)
            reward = -1f;
        else
            reward = 0.1f;

        Replay lastMemory = new Replay(
                this.transform.rotation.x,
                ball.transform.position.z,
                ball.GetComponent<Rigidbody>().angularVelocity.x,
                reward
            );

        if (replayMemory.Count > memCap)
            replayMemory.RemoveAt(0);

        replayMemory.Add(lastMemory);

        if (ball.GetComponent<BallState>().dropped) {
            for (int i = replayMemory.Count - 1; i >= 0; i--) {
                List<double> outputsOld = new List<double>();
                List<double> outputsNew = new List<double>();

                outputsOld = ann.SoftMax(ann.CalcOutput(replayMemory[i].states));

                double maxQOld = outputsOld.Max();
                int action = outputsOld.ToList().IndexOf(maxQOld);
                double feedback;

                if (i == replayMemory.Count - 1 || replayMemory[i].reward == -1) {
                    feedback = replayMemory[i].reward;
                } else {
                    outputsNew = ann.SoftMax(ann.CalcOutput(replayMemory[i + 1].states));
                    maxQ = outputsNew.Max();
                    feedback = replayMemory[i].reward + discount * maxQ;
                }

                outputsOld[action] = feedback;
                ann.Train(replayMemory[i].states, outputsOld);
            }
            ResetBall();
            failCount++;
        }

    }