Пример #1
0
        /// <summary>
        /// Generate input deltas for a recurring network.
        /// </summary>
        /// <param name="inputData"></param>
        /// <param name="targetData"></param>
        /// <param name="crossEntropy"></param>
        /// <returns>Array of input deltas.</returns>
        public float[][] InputErrorPropagationRecurring(float[][] inputData, float[][] targetData)
        {
            if (inputData.Length > maxUnrollLength)
            {
                throw new System.ArgumentException("Input/target array cannot be larger then max unroll length!");
            }
            if (!hasRecurring)
            {
                throw new System.ArgumentException("No recurring layers to perform recurring error propagation on.");
            }

            derivatives.Reset();

            for (int i = 0; i < inputData.Length; i++)
            {
                stackedRuntimeContext[i].Reset(true);
                stackedDerivativeMemory[i].Reset();
                Utils.Fill(stackedDerivativeMemory[i].inputMem, 0.0f);
            }

            //run forwardsand then run backwards backpropagating through recurring
            int dataIndex;

            for (dataIndex = 0; dataIndex < inputData.Length; dataIndex++)
            {
                Array.Copy(inputData[dataIndex], stackedRuntimeContext[dataIndex].inputData, stackedRuntimeContext[dataIndex].inputData.Length);
                neuralNetwork.Execute_FullContext(stackedRuntimeContext[dataIndex], stackedFullContext[dataIndex]);
                //neuralNetwork.ExecuteBackwards(targetData[dataArrayIndex][dataIndex], runtimeContext, fullContext, derivativeMemory);
            }
            //back propagate through stacked
            while (dataIndex-- > 0)
            {
                neuralNetwork.ExecuteBackwards(targetData[dataIndex], stackedRuntimeContext[dataIndex], stackedFullContext[dataIndex], stackedDerivativeMemory[dataIndex], 0, -1);
            }

            float[][] io = new float[inputData.Length][];
            for (int i = 0; i < io.Length; i++)
            {
                io[i] = new float[inputData[i].Length];
                Array.Copy(stackedDerivativeMemory[i].inputMem, io[i], io[i].Length);
            }
            return(io);
        }
Пример #2
0
        /// <summary>
        /// Run single iteration of learning, either 1 forward or backward propagation.
        /// </summary>
        public void Learn()
        {
            if (!running)
            {
                return;
            }

            if (resetState)
            {
                resetState      = false;
                newLoss         = 0.0f;
                lossSampleCount = 0;

                derivatives.Reset();
                for (int i = 0; i < stackedRuntimeContext.Length; i++)
                {
                    stackedRuntimeContext[i].Reset(true);
                    stackedDerivativeMemory[i].Reset();
                }

                if (hasRecurring && stochasticSkipping)
                {
                    if (targetData.Length < maxUnrollLength)
                    {
                        skipN = 0;
                    }
                    else
                    {
                        skipN = (int)(Utils.NextInt(0, (targetData[dataIndex].Length % maxUnrollLength) + 1));
                    }
                }
            }

            //run forwards for maxUnrollLength and then run backwards for maxUnrollLength backpropagating through recurring
            if (skipN > 0)
            {
                //skip random # at beginning to apply a 'shuffle'
                if (hasRecurring)
                {
                    Array.Copy(inputData[dataIndex], stackedRuntimeContext[0].inputData, stackedRuntimeContext[0].inputData.Length);
                    neuralNetwork.Execute(stackedRuntimeContext[0]);
                }
                skipN--;
            }
            else
            {
                int unrollIndex = unrollCount;
                if (!hasRecurring)
                {
                    unrollIndex = 0;
                }
                Array.Copy(inputData[dataIndex], stackedRuntimeContext[unrollIndex].inputData, stackedRuntimeContext[unrollIndex].inputData.Length);
                neuralNetwork.Execute_FullContext(stackedRuntimeContext[unrollIndex], stackedFullContext[unrollIndex]);

                unrollCount++;
                if (hasRecurring)
                {
                    if (unrollCount >= maxUnrollLength || dataIndex + 1 >= targetData.Length)
                    {
                        //back propagate through stacked
                        float nextLoss  = 0.0f;
                        int   tdatIndex = dataIndex,
                              nunroll   = unrollCount;
                        while (unrollCount-- > 0)
                        {
                            neuralNetwork.ExecuteBackwards(targetData[tdatIndex], stackedRuntimeContext[unrollCount], stackedFullContext[unrollCount], stackedDerivativeMemory[unrollCount], lossType, (lossType == LOSS_TYPE_CROSSENTROPY ? crossEntropyLossTargets[tdatIndex] : -1));
                            if (lossType == LOSS_TYPE_AVERAGE)
                            {
                                nextLoss += stackedDerivativeMemory[unrollCount].loss;
                            }
                            else
                            {
                                if (stackedDerivativeMemory[unrollCount].loss > nextLoss)
                                {
                                    nextLoss = stackedDerivativeMemory[unrollCount].loss;
                                }
                            }

                            tdatIndex--;
                        }

                        if (lossType == LOSS_TYPE_AVERAGE)
                        {
                            newLoss += nextLoss / (float)nunroll;
                            lossSampleCount++;
                        }
                        else
                        {
                            if (nextLoss > newLoss)
                            {
                                newLoss = nextLoss;
                            }
                        }
                        //learn
                        adagradMemory.Apply(stackedDerivativeMemory[0]);
                        derivatives.Reset();

                        unrollCount = 0;

                        //copy recurring state over
                        CopyRecurringState(stackedRuntimeContext[maxUnrollLength - 1], stackedRuntimeContext[0]);
                    }
                    else
                    {
                        //copy recurring state into next
                        CopyRecurringState(stackedRuntimeContext[unrollCount - 1], stackedRuntimeContext[unrollCount]);
                    }
                }
                else
                {
                    neuralNetwork.ExecuteBackwards(targetData[dataIndex], stackedRuntimeContext[unrollIndex], stackedFullContext[unrollIndex], stackedDerivativeMemory[unrollIndex], lossType, (lossType == LOSS_TYPE_CROSSENTROPY ? crossEntropyLossTargets[dataIndex] : -1));
                    if (lossType == LOSS_TYPE_AVERAGE)
                    {
                        newLoss += stackedDerivativeMemory[unrollIndex].loss;
                        lossSampleCount++;
                    }
                    else
                    {
                        if (stackedDerivativeMemory[unrollIndex].loss > newLoss)
                        {
                            newLoss = stackedDerivativeMemory[unrollIndex].loss;
                        }
                    }
                    if (unrollCount >= maxUnrollLength || dataIndex + 1 >= targetData.Length)
                    {
                        //learn
                        adagradMemory.Apply(stackedDerivativeMemory[0]);
                        derivatives.Reset();

                        unrollCount = 0;
                    }
                }
            }

            //advance index
            dataIndex++;
            if (dataIndex >= targetData.Length)
            {
                iterations++;
                dataIndex = 0;

                if (lossType == LOSS_TYPE_AVERAGE)
                {
                    newLoss /= (float)lossSampleCount;
                }

                if (newLoss < bestLoss)
                {
                    bestLoss = newLoss;
                }
                if (newLoss <= desiredLoss)
                {
                    //hit goal, stop
                    if (onReachedGoal != null)
                    {
                        onReachedGoal();
                    }
                    running = false;
                    return;
                }

                float lsl = smoothLoss;
                smoothLoss = smoothLoss * lossSmoothing + newLoss * (1.0f - lossSmoothing);
                lossDelta  = lossDelta * lossSmoothing + (lsl - smoothLoss) * (1.0f - lossSmoothing);

                lossSampleCount = 0;
                newLoss         = 0.0f;

                //stream new data
                if (onStreamNextData != null)
                {
                    resetState = onStreamNextData(ref inputData, ref targetData);
                    if (lossType == LOSS_TYPE_CROSSENTROPY)
                    {
                        crossEntropyLossTargets = new int[targetData.Length];
                        for (int i = 0; i < targetData.Length; i++)
                        {
                            int r = Utils.Largest(targetData[i], 0, targetData[i].Length);
                            if (targetData[i][r] > 0.0f)
                            {
                                crossEntropyLossTargets[i] = r;
                            }
                            else
                            {
                                crossEntropyLossTargets[i] = -1;
                            }
                        }
                    }
                }
                else
                {
                    resetState = true;
                }

                if (shuffleChance > 0.0f && Utils.NextFloat01() < shuffleChance)
                {
                    Utils.Shuffle(inputData, targetData);
                }
            }
        }
Пример #3
0
        /// <summary>
        /// Learn from rewarded sessions with specified 'learningRate', 'iter' times.
        /// </summary>
        /// <param name="learningRate">Learning rate.</param>
        public void Learn(float learningRate, int iter)
        {
            //clear current session from stream and end session stream
            ClearSession();
            learningSessionStream.Close();

            //begin reading session stream
            learningSessionStream = File.OpenRead(learningSessionsFile);

            float[]            tb   = stackedRuntimeContext[0].outputData;
            float[][]          hm   = stackedRuntimeContext[0].hiddenRecurringData;
            QLearningContext[] qctx = new QLearningContext[maxUnrollLength];
            for (int i = 0; i < maxUnrollLength; i++)
            {
                qctx[i] = new QLearningContext(0, new float[neuralNetwork.inputLayer.numberOfNeurons]);
            }

            for (int j = 0; j < iter; j++)
            {
                learningSessionStream.Position = 0;

                //training
                for (int s = 0; s < sessions.Count; s++)
                {
                    //reset derivatives/context memory
                    derivatives.Reset();
                    for (int i = 0; i < maxUnrollLength; i++)
                    {
                        stackedRuntimeContext[i].Reset(true);
                        stackedDerivativeMemory[i].Reset();
                    }

                    //initial memory state
                    for (int i = 0; i < hm.Length; i++)
                    {
                        if (hm[i] != null)
                        {
                            Utils.FloatArrayFromStream(hm[i], learningSessionStream);
                        }
                    }

                    int alen        = sessions[s],
                        unrollCount = 0;

                    //seek ahead to load reward then back
                    long lpos = learningSessionStream.Position;
                    learningSessionStream.Seek(lpos + alen * (4 + neuralNetwork.inputLayer.numberOfNeurons * 4), SeekOrigin.Begin);
                    float rewardAmount = Utils.FloatFromStream(learningSessionStream) * learningRate;
                    learningSessionStream.Seek(lpos, SeekOrigin.Begin);

                    for (int i = 0; i < alen; i++)
                    {
                        qctx[unrollCount].action = Utils.IntFromStream(learningSessionStream);
                        Utils.FloatArrayFromStream(qctx[unrollCount].input, learningSessionStream);

                        Array.Copy(qctx[unrollCount].input, stackedRuntimeContext[unrollCount].inputData, qctx[unrollCount].input.Length);
                        neuralNetwork.Execute_FullContext(stackedRuntimeContext[unrollCount], stackedFullContext[unrollCount]);
                        if (onReplayAction != null)
                        {
                            onReplayAction(qctx[unrollCount].action);
                        }

                        unrollCount++;
                        if (unrollCount >= maxUnrollLength || i + 1 >= alen)
                        {
                            //back propagate through stacked
                            int tdatIndex = i;
                            while (unrollCount-- > 0)
                            {
                                tb[qctx[unrollCount].action] = 1.0f;
                                neuralNetwork.ExecuteBackwards(tb, stackedRuntimeContext[unrollCount], stackedFullContext[unrollCount], stackedDerivativeMemory[unrollCount], NeuralNetworkTrainer.LOSS_TYPE_AVERAGE, -1);
                                tb[qctx[unrollCount].action] = 0.0f;

                                tdatIndex--;
                            }

                            //learn
                            NeuralNetworkAdaGradMemory.ApplyNoMemory(stackedDerivativeMemory[0], stackedDerivativeMemory[0].weights, stackedDerivativeMemory[0].biases, stackedDerivativeMemory[0].recurrWeights, rewardAmount);
                            derivatives.Reset();
                            unrollCount = 0;

                            if (i + 1 >= alen)
                            {
                                //not enough room for another full length propagation
                            }
                            else
                            {
                                //copy recurring state over
                                CopyRecurringState(stackedRuntimeContext[maxUnrollLength - 1], stackedRuntimeContext[0]);
                            }
                        }
                        else
                        {
                            //copy recurring state into next
                            CopyRecurringState(stackedRuntimeContext[unrollCount - 1], stackedRuntimeContext[unrollCount]);
                        }
                    }
                }
            }
        }