/// <summary> /// Create new NeuralNetworkTrainer. /// </summary> /// <param name="nn">NeuralNetwork to train.</param> /// <param name="inputDat">Input data.</param> /// <param name="targetDat">Target data.</param> /// <param name="maxUnrollLen">Memory state unroll times, for recurring layers.</param> /// <param name="losType">Loss calculation type, NeuralNetworkTrainer.LOSS_TYPE_AVERAGE/MAX/CROSSENTROPY.</param> public NeuralNetworkTrainer(NeuralNetwork nn, float[][] inputDat, float[][] targetDat, int maxUnrollLen, int losType) { neuralNetwork = nn; inputData = inputDat; targetData = targetDat; maxUnrollLength = maxUnrollLen; if (maxUnrollLength < 1) { maxUnrollLength = 1; } lossType = losType; //check for recurring layer, if need to stack and unroll if (nn.outputLayer.recurring) { hasRecurring = true; } else { for (int i = 0; i < nn.hiddenLayers.Length; i++) { if (nn.hiddenLayers[i].recurring) { hasRecurring = true; break; } } } derivatives.Setup(nn); adagradMemory.Setup(nn); adagradMemory.Reset(); int tunrollLen = maxUnrollLength; if (!hasRecurring) { tunrollLen = 1; } stackedRuntimeContext = new NeuralNetworkContext[tunrollLen]; stackedFullContext = new NeuralNetworkFullContext[tunrollLen]; stackedDerivativeMemory = new NeuralNetworkPropagationState[tunrollLen]; for (int i = 0; i < stackedRuntimeContext.Length; i++) { stackedRuntimeContext[i] = new NeuralNetworkContext(); stackedRuntimeContext[i].Setup(nn); stackedFullContext[i] = new NeuralNetworkFullContext(); stackedFullContext[i].Setup(nn); stackedDerivativeMemory[i] = new NeuralNetworkPropagationState(); stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives); } }
/// <summary> /// Create new NeuralNetworkQLearning system. /// </summary> /// <param name="nn">NeuralNetwork to train.</param> /// <param name="inputDat">Input data.</param> /// <param name="targetDat">Target data.</param> public NeuralNetworkQLearning(NeuralNetwork nn, int maxUnrollLen, string sessionsFileName) { neuralNetwork = nn; learningSessionsFile = sessionsFileName; maxUnrollLength = maxUnrollLen; if (maxUnrollLength < 1) { maxUnrollLength = 1; } //check for recurring layer, if need to stack and unroll for (int i = 0; i < nn.hiddenLayers.Length; i++) { if (nn.hiddenLayers[i].recurring) { hasRecurring = true; break; } } derivatives.Setup(nn); if (!hasRecurring) { maxUnrollLength = 1; } stackedRuntimeContext = new NeuralNetworkContext[maxUnrollLength]; stackedFullContext = new NeuralNetworkFullContext[maxUnrollLength]; stackedDerivativeMemory = new NeuralNetworkPropagationState[maxUnrollLength]; for (int i = 0; i < stackedRuntimeContext.Length; i++) { stackedRuntimeContext[i] = new NeuralNetworkContext(); stackedRuntimeContext[i].Setup(nn); stackedFullContext[i] = new NeuralNetworkFullContext(); stackedFullContext[i].Setup(nn); stackedDerivativeMemory[i] = new NeuralNetworkPropagationState(); stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives); } /* * if (hasRecurring) * { * recurringMemoryState = new float[nn.hiddenLayers.Length][]; * for (int i = 0; i < nn.hiddenLayers.Length; i++) * { * if (nn.hiddenLayers[i].recurring) * { * recurringMemoryState[i] = new float[nn.hiddenLayers[i].numberOfNeurons]; * } * } * }*/ }
public void Setup(NeuralNetwork nn, NeuralNetworkContext context, NeuralNetworkFullContext fullCtx, NeuralNetworkDerivativeMemory derivMem) { //initialize memory buffers state = new float[nn.hiddenLayers.Length][]; weights = new float[nn.hiddenLayers.Length + 1][]; biases = new float[nn.hiddenLayers.Length + 1][]; buf = new float[nn.hiddenLayers.Length + 1][]; recurrBuf = new float[nn.hiddenLayers.Length][]; biasMems = derivMem.biasMems; weightMems = derivMem.weightMems; recurrWeightMems = derivMem.recurrWeightMems; recurrWeights = new float[nn.hiddenLayers.Length][]; derivativeMemory = derivMem; for (int i = 0; i < nn.hiddenLayers.Length; i++) { state[i] = new float[nn.hiddenLayers[i].numberOfNeurons]; weights[i] = nn.hiddenConnections[i].weights; biases[i] = nn.hiddenLayers[i].biases; if (i == 0) { buf[i] = context.inputData; } else { buf[i] = fullCtx.hiddenBuffer[i - 1]; } if (nn.hiddenLayers[i].recurring) { recurrWeights[i] = nn.hiddenRecurringConnections[i].weights; recurrBuf[i] = fullCtx.hiddenRecurringBuffer[i]; } } int lid = nn.hiddenLayers.Length; weights[lid] = nn.outputConnection.weights; biases[lid] = nn.outputLayer.biases; if (lid > 0) { buf[lid] = fullCtx.hiddenBuffer[lid - 1]; } else { buf[lid] = context.inputData; } }
/// <summary> /// Create new NeuralNetworkGenerator. /// </summary> /// <param name="nn">NeuralNetwork to train.</param> public NeuralNetworkGenerator(NeuralNetwork nn, int maxUnrollLen) { neuralNetwork = nn; maxUnrollLength = maxUnrollLen; //check for recurring layer, if need to stack and unroll for (int i = 0; i < nn.hiddenLayers.Length; i++) { if (nn.hiddenLayers[i].recurring) { hasRecurring = true; break; } } derivatives.Setup(nn); if (hasRecurring) { recurringBPBuffer = new float[nn.hiddenLayers.Length][]; for (int i = 0; i < recurringBPBuffer.Length - 1; i++) { if (nn.hiddenLayers[i].recurring) { recurringBPBuffer[i] = new float[nn.hiddenLayers[i].numberOfNeurons]; } } } else { maxUnrollLen = 1; } stackedRuntimeContext = new NeuralNetworkContext[maxUnrollLen]; stackedFullContext = new NeuralNetworkFullContext[maxUnrollLen]; stackedDerivativeMemory = new NeuralNetworkPropagationState[maxUnrollLen]; for (int i = 0; i < maxUnrollLen; i++) { stackedRuntimeContext[i] = new NeuralNetworkContext(); stackedRuntimeContext[i].Setup(nn); stackedFullContext[i] = new NeuralNetworkFullContext(); stackedFullContext[i].Setup(nn); stackedDerivativeMemory[i] = new NeuralNetworkPropagationState(); stackedDerivativeMemory[i].Setup(nn, stackedRuntimeContext[i], stackedFullContext[i], derivatives); stackedDerivativeMemory[i].inputMem = new float[nn.inputLayer.numberOfNeurons]; } }
/// <summary> /// Run neural network backwards calculating derivatives to use for adagrad or generation. /// </summary> /// <param name="target"></param> /// <param name="context"></param> /// <param name="fullContext"></param> /// <param name="derivMem"></param> public void ExecuteBackwards(float[] target, NeuralNetworkContext context, NeuralNetworkFullContext fullContext, NeuralNetworkPropagationState propState, int lossType, int crossEntropyTarget) { //prepare for back propagation for (int i = 0; i < propState.state.Length; i++) { Utils.Fill(propState.state[i], 0.0f); } //back propagation + calculate max loss int lid = hiddenLayers.Length; float lossAvg = 0.0f; for (int i = 0; i < target.Length; i++) { float deriv = context.outputData[i] - target[i]; if (lossType == NeuralNetworkTrainer.LOSS_TYPE_MAX) { float aderiv = Math.Abs(deriv); if (aderiv > lossAvg) { lossAvg = aderiv; } } else if (lossType == NeuralNetworkTrainer.LOSS_TYPE_AVERAGE) { lossAvg += Math.Abs(deriv); } backpropagate(lid, i, deriv, propState); } if (lossType == NeuralNetworkTrainer.LOSS_TYPE_AVERAGE) { lossAvg /= (float)target.Length; } else { if (lossType == NeuralNetworkTrainer.LOSS_TYPE_CROSSENTROPY && crossEntropyTarget != -1) { lossAvg = (float)-Math.Log(context.outputData[crossEntropyTarget]); if (float.IsInfinity(lossAvg)) { lossAvg = 1e8f; } } } propState.loss = lossAvg; propState.derivativeMemory.SwapBPBuffers(); int k = lid; while (k-- > 0) { int l = hiddenLayers[k].numberOfNeurons; while (l-- > 0) { backpropagate(k, l, propState.state[k][l], propState); } } }
//execute neural network and save all calculation results in fullContext for adagrad /// <summary> /// Execute neural network and save all calculation results in fullContext for adagrad /// </summary> /// <param name="input"></param> /// <param name="context"></param> /// <param name="fullContext"></param> public void Execute_FullContext(NeuralNetworkContext context, NeuralNetworkFullContext fullContext) { float[] input = context.inputData, output = context.outputData, hidden = context.hiddenData; float[][] hiddenRecurring = context.hiddenRecurringData; int i, weightIndex, recurringWeightIndex; NeuronActivationFunction activeFunc; if (hiddenLayers.Length > 0) { int lastNumNeurons = 0; float[] weights, biases, recurringWeights; for (i = 0; i < hiddenLayers.Length; i++) { weights = hiddenConnections[i].weights; biases = hiddenLayers[i].biases; activeFunc = hiddenLayers[i].activationFunction; float[] ina; int alen; if (i == 0) { ina = input; alen = input.Length; } else { ina = hidden; alen = lastNumNeurons; } if (hiddenLayers[i].recurring) { //recurring hidden layer float[] hrec = hiddenRecurring[i]; recurringWeights = hiddenRecurringConnections[i].weights; //copy over data needed for training Array.Copy(hrec, fullContext.hiddenRecurringBuffer[i], hrec.Length); weightIndex = 0; recurringWeightIndex = 0; int k = biases.Length; while (k-- > 0) { float ov = biases[k]; int j = alen; while (j-- > 0) { ov += ina[j] * weights[weightIndex++]; } j = hrec.Length; while (j-- > 0) { ov += hrec[j] * recurringWeights[recurringWeightIndex++]; } hidden[k] = activeFunc(ov); } Array.Copy(hidden, hrec, biases.Length); } else { //non recurring hidden layer weightIndex = 0; int k = biases.Length; while (k-- > 0) { float ov = biases[k]; int j = alen; while (j-- > 0) { ov += ina[j] * weights[weightIndex++]; } hidden[k] = activeFunc(ov); } } Array.Copy(hidden, fullContext.hiddenBuffer[i], biases.Length); lastNumNeurons = biases.Length; } activeFunc = outputLayer.activationFunction; //last output layer //run input to output layer connection weights = outputConnection.weights; biases = outputLayer.biases; weightIndex = 0; recurringWeightIndex = 0; i = output.Length; while (i-- > 0) { float ov = biases[i]; //input connections int k = lastNumNeurons; while (k-- > 0) { ov += hidden[k] * weights[weightIndex++]; } output[i] = activeFunc(ov); } } else { activeFunc = outputLayer.activationFunction; //run input to output layer connection with recurring output float[] weights = outputConnection.weights, biases = outputLayer.biases; weightIndex = 0; recurringWeightIndex = 0; i = output.Length; while (i-- > 0) { float ov = biases[i]; //input connections int k = input.Length; while (k-- > 0) { ov += input[k] * weights[weightIndex++]; } output[i] = activeFunc(ov); } } }