/// <summary> /// Generate input deltas for a recurring network. /// </summary> /// <param name="inputData"></param> /// <param name="targetData"></param> /// <param name="crossEntropy"></param> /// <returns>Array of input deltas.</returns> public float[][] InputErrorPropagationRecurring(float[][] inputData, float[][] targetData) { if (inputData.Length > maxUnrollLength) { throw new System.ArgumentException("Input/target array cannot be larger then max unroll length!"); } if (!hasRecurring) { throw new System.ArgumentException("No recurring layers to perform recurring error propagation on."); } derivatives.Reset(); for (int i = 0; i < inputData.Length; i++) { stackedRuntimeContext[i].Reset(true); stackedDerivativeMemory[i].Reset(); Utils.Fill(stackedDerivativeMemory[i].inputMem, 0.0f); } //run forwardsand then run backwards backpropagating through recurring int dataIndex; for (dataIndex = 0; dataIndex < inputData.Length; dataIndex++) { Array.Copy(inputData[dataIndex], stackedRuntimeContext[dataIndex].inputData, stackedRuntimeContext[dataIndex].inputData.Length); neuralNetwork.Execute_FullContext(stackedRuntimeContext[dataIndex], stackedFullContext[dataIndex]); //neuralNetwork.ExecuteBackwards(targetData[dataArrayIndex][dataIndex], runtimeContext, fullContext, derivativeMemory); } //back propagate through stacked while (dataIndex-- > 0) { neuralNetwork.ExecuteBackwards(targetData[dataIndex], stackedRuntimeContext[dataIndex], stackedFullContext[dataIndex], stackedDerivativeMemory[dataIndex], 0, -1); } float[][] io = new float[inputData.Length][]; for (int i = 0; i < io.Length; i++) { io[i] = new float[inputData[i].Length]; Array.Copy(stackedDerivativeMemory[i].inputMem, io[i], io[i].Length); } return(io); }
/// <summary> /// Run single iteration of learning, either 1 forward or backward propagation. /// </summary> public void Learn() { if (!running) { return; } if (resetState) { resetState = false; newLoss = 0.0f; lossSampleCount = 0; derivatives.Reset(); for (int i = 0; i < stackedRuntimeContext.Length; i++) { stackedRuntimeContext[i].Reset(true); stackedDerivativeMemory[i].Reset(); } if (hasRecurring && stochasticSkipping) { if (targetData.Length < maxUnrollLength) { skipN = 0; } else { skipN = (int)(Utils.NextInt(0, (targetData[dataIndex].Length % maxUnrollLength) + 1)); } } } //run forwards for maxUnrollLength and then run backwards for maxUnrollLength backpropagating through recurring if (skipN > 0) { //skip random # at beginning to apply a 'shuffle' if (hasRecurring) { Array.Copy(inputData[dataIndex], stackedRuntimeContext[0].inputData, stackedRuntimeContext[0].inputData.Length); neuralNetwork.Execute(stackedRuntimeContext[0]); } skipN--; } else { int unrollIndex = unrollCount; if (!hasRecurring) { unrollIndex = 0; } Array.Copy(inputData[dataIndex], stackedRuntimeContext[unrollIndex].inputData, stackedRuntimeContext[unrollIndex].inputData.Length); neuralNetwork.Execute_FullContext(stackedRuntimeContext[unrollIndex], stackedFullContext[unrollIndex]); unrollCount++; if (hasRecurring) { if (unrollCount >= maxUnrollLength || dataIndex + 1 >= targetData.Length) { //back propagate through stacked float nextLoss = 0.0f; int tdatIndex = dataIndex, nunroll = unrollCount; while (unrollCount-- > 0) { neuralNetwork.ExecuteBackwards(targetData[tdatIndex], stackedRuntimeContext[unrollCount], stackedFullContext[unrollCount], stackedDerivativeMemory[unrollCount], lossType, (lossType == LOSS_TYPE_CROSSENTROPY ? crossEntropyLossTargets[tdatIndex] : -1)); if (lossType == LOSS_TYPE_AVERAGE) { nextLoss += stackedDerivativeMemory[unrollCount].loss; } else { if (stackedDerivativeMemory[unrollCount].loss > nextLoss) { nextLoss = stackedDerivativeMemory[unrollCount].loss; } } tdatIndex--; } if (lossType == LOSS_TYPE_AVERAGE) { newLoss += nextLoss / (float)nunroll; lossSampleCount++; } else { if (nextLoss > newLoss) { newLoss = nextLoss; } } //learn adagradMemory.Apply(stackedDerivativeMemory[0]); derivatives.Reset(); unrollCount = 0; //copy recurring state over CopyRecurringState(stackedRuntimeContext[maxUnrollLength - 1], stackedRuntimeContext[0]); } else { //copy recurring state into next CopyRecurringState(stackedRuntimeContext[unrollCount - 1], stackedRuntimeContext[unrollCount]); } } else { neuralNetwork.ExecuteBackwards(targetData[dataIndex], stackedRuntimeContext[unrollIndex], stackedFullContext[unrollIndex], stackedDerivativeMemory[unrollIndex], lossType, (lossType == LOSS_TYPE_CROSSENTROPY ? crossEntropyLossTargets[dataIndex] : -1)); if (lossType == LOSS_TYPE_AVERAGE) { newLoss += stackedDerivativeMemory[unrollIndex].loss; lossSampleCount++; } else { if (stackedDerivativeMemory[unrollIndex].loss > newLoss) { newLoss = stackedDerivativeMemory[unrollIndex].loss; } } if (unrollCount >= maxUnrollLength || dataIndex + 1 >= targetData.Length) { //learn adagradMemory.Apply(stackedDerivativeMemory[0]); derivatives.Reset(); unrollCount = 0; } } } //advance index dataIndex++; if (dataIndex >= targetData.Length) { iterations++; dataIndex = 0; if (lossType == LOSS_TYPE_AVERAGE) { newLoss /= (float)lossSampleCount; } if (newLoss < bestLoss) { bestLoss = newLoss; } if (newLoss <= desiredLoss) { //hit goal, stop if (onReachedGoal != null) { onReachedGoal(); } running = false; return; } float lsl = smoothLoss; smoothLoss = smoothLoss * lossSmoothing + newLoss * (1.0f - lossSmoothing); lossDelta = lossDelta * lossSmoothing + (lsl - smoothLoss) * (1.0f - lossSmoothing); lossSampleCount = 0; newLoss = 0.0f; //stream new data if (onStreamNextData != null) { resetState = onStreamNextData(ref inputData, ref targetData); if (lossType == LOSS_TYPE_CROSSENTROPY) { crossEntropyLossTargets = new int[targetData.Length]; for (int i = 0; i < targetData.Length; i++) { int r = Utils.Largest(targetData[i], 0, targetData[i].Length); if (targetData[i][r] > 0.0f) { crossEntropyLossTargets[i] = r; } else { crossEntropyLossTargets[i] = -1; } } } } else { resetState = true; } if (shuffleChance > 0.0f && Utils.NextFloat01() < shuffleChance) { Utils.Shuffle(inputData, targetData); } } }
/// <summary> /// Learn from rewarded sessions with specified 'learningRate', 'iter' times. /// </summary> /// <param name="learningRate">Learning rate.</param> public void Learn(float learningRate, int iter) { //clear current session from stream and end session stream ClearSession(); learningSessionStream.Close(); //begin reading session stream learningSessionStream = File.OpenRead(learningSessionsFile); float[] tb = stackedRuntimeContext[0].outputData; float[][] hm = stackedRuntimeContext[0].hiddenRecurringData; QLearningContext[] qctx = new QLearningContext[maxUnrollLength]; for (int i = 0; i < maxUnrollLength; i++) { qctx[i] = new QLearningContext(0, new float[neuralNetwork.inputLayer.numberOfNeurons]); } for (int j = 0; j < iter; j++) { learningSessionStream.Position = 0; //training for (int s = 0; s < sessions.Count; s++) { //reset derivatives/context memory derivatives.Reset(); for (int i = 0; i < maxUnrollLength; i++) { stackedRuntimeContext[i].Reset(true); stackedDerivativeMemory[i].Reset(); } //initial memory state for (int i = 0; i < hm.Length; i++) { if (hm[i] != null) { Utils.FloatArrayFromStream(hm[i], learningSessionStream); } } int alen = sessions[s], unrollCount = 0; //seek ahead to load reward then back long lpos = learningSessionStream.Position; learningSessionStream.Seek(lpos + alen * (4 + neuralNetwork.inputLayer.numberOfNeurons * 4), SeekOrigin.Begin); float rewardAmount = Utils.FloatFromStream(learningSessionStream) * learningRate; learningSessionStream.Seek(lpos, SeekOrigin.Begin); for (int i = 0; i < alen; i++) { qctx[unrollCount].action = Utils.IntFromStream(learningSessionStream); Utils.FloatArrayFromStream(qctx[unrollCount].input, learningSessionStream); Array.Copy(qctx[unrollCount].input, stackedRuntimeContext[unrollCount].inputData, qctx[unrollCount].input.Length); neuralNetwork.Execute_FullContext(stackedRuntimeContext[unrollCount], stackedFullContext[unrollCount]); if (onReplayAction != null) { onReplayAction(qctx[unrollCount].action); } unrollCount++; if (unrollCount >= maxUnrollLength || i + 1 >= alen) { //back propagate through stacked int tdatIndex = i; while (unrollCount-- > 0) { tb[qctx[unrollCount].action] = 1.0f; neuralNetwork.ExecuteBackwards(tb, stackedRuntimeContext[unrollCount], stackedFullContext[unrollCount], stackedDerivativeMemory[unrollCount], NeuralNetworkTrainer.LOSS_TYPE_AVERAGE, -1); tb[qctx[unrollCount].action] = 0.0f; tdatIndex--; } //learn NeuralNetworkAdaGradMemory.ApplyNoMemory(stackedDerivativeMemory[0], stackedDerivativeMemory[0].weights, stackedDerivativeMemory[0].biases, stackedDerivativeMemory[0].recurrWeights, rewardAmount); derivatives.Reset(); unrollCount = 0; if (i + 1 >= alen) { //not enough room for another full length propagation } else { //copy recurring state over CopyRecurringState(stackedRuntimeContext[maxUnrollLength - 1], stackedRuntimeContext[0]); } } else { //copy recurring state into next CopyRecurringState(stackedRuntimeContext[unrollCount - 1], stackedRuntimeContext[unrollCount]); } } } } }