/// <summary> /// Create new NeuralNetworkTrainer. /// </summary> /// <param name="nn">NeuralNetwork to train.</param> /// <param name="inputDat">Input data.</param> /// <param name="targetDat">Target data.</param> /// <param name="maxUnrollLen">Memory state unroll times, for recurring layers.</param> /// <param name="losType">Loss calculation type, NeuralNetworkTrainer.LOSS_TYPE_AVERAGE/MAX/CROSSENTROPY.</param> public NeuralNetworkTrainer(NeuralNetwork nn, float[][] inputDat, float[][] targetDat, int maxUnrollLen, int losType) { neuralNetwork = nn; inputData = inputDat; targetData = targetDat; maxUnrollLength = maxUnrollLen; if (maxUnrollLength < 1) { maxUnrollLength = 1; } lossType = losType; //check for recurring layer, if need to stack and unroll if (nn.outputLayer.recurring) { hasRecurring = true; } else { for (int i = 0; i < nn.hiddenLayers.Length; i++) { if (nn.hiddenLayers[i].recurring) { hasRecurring = true; break; } } } derivatives.Setup(nn); adagradMemory.Setup(nn); adagradMemory.Reset(); int tunrollLen = maxUnrollLength; if (!hasRecurring) { tunrollLen = 1; } stackedRuntimeContext = new NeuralNetworkContext[tunrollLen]; stackedFullContext = new NeuralNetworkFullContext[tunrollLen]; stackedDerivativeMemory = new NeuralNetworkPropagationState[tunrollLen]; for (int i = 0; i < stackedRuntimeContext.Length; i++) { stackedRuntimeContext[i] = new NeuralNetworkContext(); stackedRuntimeContext[i].Setup(nn); stackedFullContext[i] = new NeuralNetworkFullContext(); stackedFullContext[i].Setup(nn); stackedDerivativeMemory[i] = new NeuralNetworkPropagationState(); stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives); } }
/// <summary> /// Create new NeuralNetworkQLearning system. /// </summary> /// <param name="nn">NeuralNetwork to train.</param> /// <param name="inputDat">Input data.</param> /// <param name="targetDat">Target data.</param> public NeuralNetworkQLearning(NeuralNetwork nn, int maxUnrollLen, string sessionsFileName) { neuralNetwork = nn; learningSessionsFile = sessionsFileName; maxUnrollLength = maxUnrollLen; if (maxUnrollLength < 1) { maxUnrollLength = 1; } //check for recurring layer, if need to stack and unroll for (int i = 0; i < nn.hiddenLayers.Length; i++) { if (nn.hiddenLayers[i].recurring) { hasRecurring = true; break; } } derivatives.Setup(nn); if (!hasRecurring) { maxUnrollLength = 1; } stackedRuntimeContext = new NeuralNetworkContext[maxUnrollLength]; stackedFullContext = new NeuralNetworkFullContext[maxUnrollLength]; stackedDerivativeMemory = new NeuralNetworkPropagationState[maxUnrollLength]; for (int i = 0; i < stackedRuntimeContext.Length; i++) { stackedRuntimeContext[i] = new NeuralNetworkContext(); stackedRuntimeContext[i].Setup(nn); stackedFullContext[i] = new NeuralNetworkFullContext(); stackedFullContext[i].Setup(nn); stackedDerivativeMemory[i] = new NeuralNetworkPropagationState(); stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives); } /* * if (hasRecurring) * { * recurringMemoryState = new float[nn.hiddenLayers.Length][]; * for (int i = 0; i < nn.hiddenLayers.Length; i++) * { * if (nn.hiddenLayers[i].recurring) * { * recurringMemoryState[i] = new float[nn.hiddenLayers[i].numberOfNeurons]; * } * } * }*/ }
/// <summary> /// Create new NeuralNetworkGenerator. /// </summary> /// <param name="nn">NeuralNetwork to train.</param> public NeuralNetworkGenerator(NeuralNetwork nn, int maxUnrollLen) { neuralNetwork = nn; maxUnrollLength = maxUnrollLen; //check for recurring layer, if need to stack and unroll for (int i = 0; i < nn.hiddenLayers.Length; i++) { if (nn.hiddenLayers[i].recurring) { hasRecurring = true; break; } } derivatives.Setup(nn); if (hasRecurring) { recurringBPBuffer = new float[nn.hiddenLayers.Length][]; for (int i = 0; i < recurringBPBuffer.Length - 1; i++) { if (nn.hiddenLayers[i].recurring) { recurringBPBuffer[i] = new float[nn.hiddenLayers[i].numberOfNeurons]; } } } else { maxUnrollLen = 1; } stackedRuntimeContext = new NeuralNetworkContext[maxUnrollLen]; stackedFullContext = new NeuralNetworkFullContext[maxUnrollLen]; stackedDerivativeMemory = new NeuralNetworkPropagationState[maxUnrollLen]; for (int i = 0; i < maxUnrollLen; i++) { stackedRuntimeContext[i] = new NeuralNetworkContext(); stackedRuntimeContext[i].Setup(nn); stackedFullContext[i] = new NeuralNetworkFullContext(); stackedFullContext[i].Setup(nn); stackedDerivativeMemory[i] = new NeuralNetworkPropagationState(); stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives); stackedDerivativeMemory[i].inputMem = new float[nn.inputLayer.numberOfNeurons]; } }
private void backpropagate(int level, int index, float deriv, NeuralNetworkPropagationState propState) { if (level < 0) { return; } int i, weightIndex; float[] b, m, w; //recurring weights if (level < propState.recurrWeightMems.Length && propState.recurrWeightMems[level] != null) { b = propState.recurrBuf[level]; m = propState.recurrWeightMems[level]; w = propState.recurrWeights[level]; i = b.Length; weightIndex = w.Length - (index + 1) * i; float nhderiv = 0.0f; while (i-- > 0) { m[weightIndex] += deriv * b[i]; nhderiv += deriv * w[weightIndex]; weightIndex++; } #pragma warning disable 414,1718 if (nhderiv != nhderiv || float.IsInfinity(nhderiv)) { nhderiv = 0.0f; } #pragma warning restore 1718 propState.derivativeMemory.altRecurringBPBuffer[level][index] = nhderiv; } float[] bpb = null; //biases and weights b = propState.buf[level]; m = propState.weightMems[level]; w = propState.weights[level]; bpb = null; if (level != 0) { bpb = propState.derivativeMemory.recurringBPBuffer[level - 1]; } propState.biasMems[level][index] += deriv; i = b.Length; weightIndex = w.Length - (index + 1) * i; while (i-- > 0) { float nderiv = b[i]; m[weightIndex] += deriv * nderiv; if (level != 0) { nderiv *= nderiv; float bpropderiv = 0.0f; if (bpb != null) { bpropderiv = bpb[i]; } propState.state[level - 1][i] += (1.0f - nderiv) * (deriv * w[weightIndex] + bpropderiv); } else { if (propState.inputMem != null) { nderiv *= nderiv; float bpropderiv = 0.0f; if (bpb != null) { bpropderiv = bpb[i]; } nderiv = (1.0f - nderiv) * (deriv * w[weightIndex] + bpropderiv); propState.inputMem[i] += nderiv; } } weightIndex++; } }
/// <summary> /// Run neural network backwards calculating derivatives to use for adagrad or generation. /// </summary> /// <param name="target"></param> /// <param name="context"></param> /// <param name="fullContext"></param> /// <param name="derivMem"></param> public void ExecuteBackwards(float[] target, NeuralNetworkContext context, NeuralNetworkFullContext fullContext, NeuralNetworkPropagationState propState, int lossType, int crossEntropyTarget) { //prepare for back propagation for (int i = 0; i < propState.state.Length; i++) { Utils.Fill(propState.state[i], 0.0f); } //back propagation + calculate max loss int lid = hiddenLayers.Length; float lossAvg = 0.0f; for (int i = 0; i < target.Length; i++) { float deriv = context.outputData[i] - target[i]; if (lossType == NeuralNetworkTrainer.LOSS_TYPE_MAX) { float aderiv = Math.Abs(deriv); if (aderiv > lossAvg) { lossAvg = aderiv; } } else if (lossType == NeuralNetworkTrainer.LOSS_TYPE_AVERAGE) { lossAvg += Math.Abs(deriv); } backpropagate(lid, i, deriv, propState); } if (lossType == NeuralNetworkTrainer.LOSS_TYPE_AVERAGE) { lossAvg /= (float)target.Length; } else { if (lossType == NeuralNetworkTrainer.LOSS_TYPE_CROSSENTROPY && crossEntropyTarget != -1) { lossAvg = (float)-Math.Log(context.outputData[crossEntropyTarget]); if (float.IsInfinity(lossAvg)) { lossAvg = 1e8f; } } } propState.loss = lossAvg; propState.derivativeMemory.SwapBPBuffers(); int k = lid; while (k-- > 0) { int l = hiddenLayers[k].numberOfNeurons; while (l-- > 0) { backpropagate(k, l, propState.state[k][l], propState); } } }
/// <summary> /// Add derivatives without per-parameter learning rate. /// </summary> /// <param name="derivMem"></param> /// <param name="weight"></param> /// <param name="bias"></param> /// <param name="recurrWeight"></param> public static void ApplyNoMemory(NeuralNetworkPropagationState derivMem, float[][] weight, float[][] bias, float[][] recurrWeight, float learningRate) { for (int i = 0; i < weight.Length; i++) { float[] f = derivMem.weightMems[i], w = weight[i]; int k = f.Length; while (k-- > 0) { float d = f[k]; if (d < -EXPLODING_GRADIENT_CLAMP) { d = -EXPLODING_GRADIENT_CLAMP; } else if (d > EXPLODING_GRADIENT_CLAMP) { d = EXPLODING_GRADIENT_CLAMP; } w[k] -= (learningRate * d); } f = derivMem.biasMems[i]; w = bias[i]; k = f.Length; while (k-- > 0) { float d = f[k]; if (d < -EXPLODING_GRADIENT_CLAMP) { d = -EXPLODING_GRADIENT_CLAMP; } else { if (d > EXPLODING_GRADIENT_CLAMP) { d = EXPLODING_GRADIENT_CLAMP; } } w[k] -= (learningRate * d); } if (recurrWeight[i] != null) { f = derivMem.recurrWeightMems[i]; w = recurrWeight[i]; k = f.Length; while (k-- > 0) { float d = f[k]; if (d < -EXPLODING_GRADIENT_CLAMP) { d = -EXPLODING_GRADIENT_CLAMP; } else { if (d > EXPLODING_GRADIENT_CLAMP) { d = EXPLODING_GRADIENT_CLAMP; } } w[k] -= (learningRate * d); } } } }
/// <summary> /// Add derivatives to learning rate and apply to network weights/biases. /// </summary> /// <param name="derivMem"></param> /// <param name="weight"></param> /// <param name="bias"></param> /// <param name="recurrWeight"></param> public void Apply(NeuralNetworkPropagationState derivMem, float[][] weight, float[][] bias, float[][] recurrWeight) { for (int i = 0; i < weights.Length; i++) { float[] t = weights[i], f = derivMem.weightMems[i], w = weight[i]; int k = f.Length; while (k-- > 0) { float m = t[k], d = f[k]; if (d < -EXPLODING_GRADIENT_CLAMP) { d = -EXPLODING_GRADIENT_CLAMP; } else if (d > EXPLODING_GRADIENT_CLAMP) { d = EXPLODING_GRADIENT_CLAMP; } m += d * d; w[k] -= (learningRate * d) / (float)Math.Sqrt(m + SQRT_EPSILON); t[k] = m; } t = biases[i]; f = derivMem.biasMems[i]; w = bias[i]; k = f.Length; while (k-- > 0) { float m = t[k], d = f[k]; if (d < -EXPLODING_GRADIENT_CLAMP) { d = -EXPLODING_GRADIENT_CLAMP; } else { if (d > EXPLODING_GRADIENT_CLAMP) { d = EXPLODING_GRADIENT_CLAMP; } } m += d * d; w[k] -= (learningRate * d) / (float)Math.Sqrt(m + SQRT_EPSILON); t[k] = m; } t = i < recurringWeights.Length ? recurringWeights[i] : null; if (t != null) { f = derivMem.recurrWeightMems[i]; w = recurrWeight[i]; k = f.Length; while (k-- > 0) { float m = t[k], d = f[k]; if (d < -EXPLODING_GRADIENT_CLAMP) { d = -EXPLODING_GRADIENT_CLAMP; } else { if (d > EXPLODING_GRADIENT_CLAMP) { d = EXPLODING_GRADIENT_CLAMP; } } m += d * d; w[k] -= (learningRate * d) / (float)Math.Sqrt(m + SQRT_EPSILON); t[k] = m; } } } }
/// <summary> /// Add derivatives to learning rate and apply to network weights/biases /// </summary> /// <param name="derivMem"></param> public void Apply(NeuralNetworkPropagationState derivMem) { Apply(derivMem, derivMem.weights, derivMem.biases, derivMem.recurrWeights); }