internal float[] Compute(ComputeDevice mathLib, float[] input, ref List <float[]> activations, ref List <float[]> zValues, bool flushMathlibWorkingCache) { var current = input; bool applySigmoid = zValues == null; PasstroughActivation passtroughActivation = applySigmoid ? null : new PasstroughActivation(); foreach (var layer in layers) { current = layer.Compute(mathLib, current, applySigmoid ? activationFunction : passtroughActivation); if (zValues != null) { zValues.Add((float[])current.Clone()); for (int i = 0; i < current.Length; i++) { current[i] = activationFunction.Calculate(current[i]); } } if (activations != null) { activations.Add(current); } } if (flushMathlibWorkingCache) { mathLib.FlushWorkingCache(); } return(current); }
/// <summary> /// Trains the network using the given training suite and calculator /// The functions returns immediately with a promise object that can be used to monitor progress. /// Note: Using the network during training is not permitted. /// </summary> /// <param name="trainingSuite">The training suite to be used</param> /// <param name="calculator">The calculator (containing a compute device) to be used for calculations</param> /// <returns>A promise that can be used to check the </returns> public TrainingPromise Train(TrainingSuite trainingSuite, ComputeDevice calculator) { if (trainingPromise != null) { throw new Exception("Cannot perform operation while training is in progress!"); } trainingPromise = new TrainingPromise(); if (trainingSuite.config.epochs < 1) { trainingPromise.SetProgress(1, 0); return(trainingPromise); } trainingThread = new Thread(() => { for (int currentEpoch = 0; currentEpoch < trainingSuite.config.epochs; currentEpoch++) { if (trainingPromise.IsStopAtNextEpoch()) { break; } if (trainingSuite.config.shuffleTrainingData) { Utils.ShuffleList(ref trainingSuite.trainingData); } int trainingDataBegin = 0; int trainingDataEnd = trainingSuite.config.UseMinibatches() ? Math.Min(trainingSuite.config.miniBatchSize, trainingSuite.trainingData.Count) : trainingSuite.trainingData.Count; while (true) { //Calculate the accumulated gradient. Accumulated means, that the gradient has to be divided by the number of samples in the minibatch. List <List <NeuronData> > accumulatedGradient = null; accumulatedGradient = calculator.CalculateAccumulatedGradientForMinibatch(this, trainingSuite, trainingDataBegin, trainingDataEnd); float sizeDivisor = (float)(trainingDataEnd - trainingDataBegin) / (float)trainingSuite.trainingData.Count; //Calculate regularization terms based on the training configuration float regularizationTerm1 = 1.0f; float regularizationTerm2Base = 0.0f; if (trainingSuite.config.regularization == TrainingSuite.TrainingConfig.Regularization.L2) { regularizationTerm1 = 1.0f - trainingSuite.config.learningRate * (trainingSuite.config.regularizationLambda / (float)trainingSuite.trainingData.Count); } else if (trainingSuite.config.regularization == TrainingSuite.TrainingConfig.Regularization.L1) { regularizationTerm2Base = -((trainingSuite.config.learningRate * (trainingSuite.config.regularizationLambda / (float)trainingSuite.trainingData.Count))); } bool applyRegularizationTerm2 = trainingSuite.config.regularization == TrainingSuite.TrainingConfig.Regularization.L1; //Apply accumulated gradient to network (Gradient descent) float sizeDivisorAndLearningRate = sizeDivisor * trainingSuite.config.learningRate; for (int i = 0; i < layers.Count; ++i) { var layer = layers[i]; var weightsPerNeuron = layer.GetWeightsPerNeuron(); var layerNeuronCount = layer.GetNeuronCount(); var weightMx = layer.weightMx; var biases = layer.biases; for (int j = 0; j < layerNeuronCount; ++j) { var layerGradientWeights = accumulatedGradient[i][j].weights; biases[j] -= accumulatedGradient[i][j].bias * sizeDivisorAndLearningRate; for (int w = 0; w < weightsPerNeuron; ++w) { weightMx[j, w] = regularizationTerm1 * weightMx[j, w] - layerGradientWeights[w] * sizeDivisorAndLearningRate; if (applyRegularizationTerm2) { weightMx[j, w] -= regularizationTerm2Base * Utils.Sign(weightMx[j, w]); } } } } //Set up the next minibatch, or quit the loop if we're done. if (trainingSuite.config.UseMinibatches()) { if (trainingDataEnd >= trainingSuite.trainingData.Count) { break; } trainingPromise.SetProgress(((float)trainingDataEnd + ((float)currentEpoch * (float)trainingSuite.trainingData.Count)) / ((float)trainingSuite.trainingData.Count * (float)trainingSuite.config.epochs), currentEpoch + 1); trainingDataBegin = trainingDataEnd; trainingDataEnd = Math.Min(trainingDataEnd + trainingSuite.config.miniBatchSize, trainingSuite.trainingData.Count); } else { break; } } } calculator.FlushWorkingCache(); //Release any cache that the mathLib has built up. trainingPromise.SetProgress(1, trainingPromise.GetEpochsDone()); //Report that the training is finished trainingPromise = null; }); trainingThread.Start(); return(trainingPromise); }
/// <summary> /// Trains the network using the given training suite and calculator /// The functions returns immediately with a promise object that can be used to monitor progress. /// Note: Using the network during training is not permitted. /// </summary> /// <param name="trainingSuite">The training suite to be used</param> /// <param name="calculator">The calculator (containing a compute device) to be used for calculations</param> /// <returns>A promise that can be used to check the </returns> public TrainingPromise Train(TrainingSuite trainingSuite, ComputeDevice calculator) { if (trainingPromise != null) { throw new Exception("Cannot perform operation while training is in progress!"); } trainingPromise = new TrainingPromise(); if (trainingSuite.config.epochs < 1) { trainingPromise.SetProgress(1, 0); return(trainingPromise); } trainingThread = new Thread(() => { Network[] evolution_population = null; if (trainingSuite.config.trainingMode == TrainingConfig.TrainingMode.Evolution) { evolution_population = new Network[trainingSuite.config.evolutionPopulationSize]; for (int i = 0; i < evolution_population.Length; ++i) { evolution_population[i] = new Network(this); } } for (int currentEpoch = 0; currentEpoch < trainingSuite.config.epochs; currentEpoch++) { if (trainingPromise.IsStopAtNextEpoch()) { break; } if (trainingSuite.config.shuffleTrainingData) { Utils.ShuffleList(ref trainingSuite.trainingData); } int trainingDataBegin = 0; int trainingDataEnd = trainingSuite.config.UseMinibatches() ? Math.Min(trainingSuite.config.miniBatchSize, trainingSuite.trainingData.Count) : trainingSuite.trainingData.Count; while (true) { switch (trainingSuite.config.trainingMode) { case TrainingConfig.TrainingMode.Backpropagation: TrainWithBackpropagation(trainingSuite, trainingDataBegin, trainingDataEnd, calculator); break; case TrainingConfig.TrainingMode.Evolution: TrainWithEvolution(trainingSuite, trainingDataBegin, trainingDataEnd, evolution_population, calculator); break; default: //error break; } //Set up the next minibatch, or quit the loop if we're done. if (trainingSuite.config.UseMinibatches()) { if (trainingDataEnd >= trainingSuite.trainingData.Count) { break; } trainingPromise.SetProgress(((float)trainingDataEnd + ((float)currentEpoch * (float)trainingSuite.trainingData.Count)) / ((float)trainingSuite.trainingData.Count * (float)trainingSuite.config.epochs), currentEpoch + 1); trainingDataBegin = trainingDataEnd; trainingDataEnd = Math.Min(trainingDataEnd + trainingSuite.config.miniBatchSize, trainingSuite.trainingData.Count); } else { break; } } } calculator.FlushWorkingCache(); //Release any cache that the mathLib has built up. if (trainingSuite.config.trainingMode == TrainingConfig.TrainingMode.Evolution) { this.layers = evolution_population[0].layers; //move the best performing layer without copying evolution_population = null; } trainingPromise.SetProgress(1, trainingPromise.GetEpochsDone()); //Report that the training is finished trainingPromise = null; }); trainingThread.Start(); return(trainingPromise); }