public static INeuralNetwork TryLoad([NotNull] Stream stream, ExecutionModePreference preference) { try { using (GZipStream gzip = new GZipStream(stream, CompressionMode.Decompress)) { if (!gzip.TryRead(out NetworkType model)) { return(null); } switch (model) { case NetworkType.Sequential: return(SequentialNetwork.Deserialize(gzip, preference)); case NetworkType.ComputationGraph: return(ComputationGraphNetwork.Deserialize(gzip, preference)); default: return(null); } } } catch { // Locked or invalid file return(null); } }
public static WeightsUpdater RMSProp([NotNull] RMSPropInfo info, [NotNull] SequentialNetwork network) { // Setup float eta = info.Eta, rho = info.Rho, lambda = info.Lambda, epsilon = info.Epsilon; float[][] mW = new float[network.WeightedLayersIndexes.Length][], mB = new float[network.WeightedLayersIndexes.Length][]; for (int i = 0; i < network.WeightedLayersIndexes.Length; i++) { WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>(); mW[i] = new float[layer.Weights.Length]; mB[i] = new float[layer.Biases.Length]; } // Closure unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer) { // Tweak the weights float alpha = eta / samples, l2Factor = eta * lambda / samples; fixed(float *pw = layer.Weights, pmw = mW[i]) { float *pdj = dJdw; int w = layer.Weights.Length; for (int x = 0; x < w; x++) { float pdJi = pdj[x]; pmw[x] = rho * pmw[x] + (1 - rho) * pdJi * pdJi; pw[x] -= l2Factor * pw[x] + alpha * pdJi / ((float)Math.Sqrt(pmw[x]) + epsilon); } } // Tweak the biases of the lth layer fixed(float *pb = layer.Biases, pmb = mB[i]) { float *pdj = dJdb; int w = layer.Biases.Length; for (int b = 0; b < w; b++) { float pdJi = pdj[b]; pmb[b] = rho * pmb[b] + (1 - rho) * pdJi * pdJi; pb[b] -= alpha * pdJi / ((float)Math.Sqrt(pmb[b]) + epsilon); } } } return(Minimize); }
private static TrainingSessionResult Optimize( SequentialNetwork network, BatchesCollection miniBatches, int epochs, float dropout, [NotNull] WeightsUpdater updater, [CanBeNull] IProgress <BatchProgress> batchProgress, [CanBeNull] IProgress <TrainingProgressEventArgs> trainingProgress, [CanBeNull] ValidationDataset validationDataset, [CanBeNull] TestDataset testDataset, CancellationToken token) { // Setup DateTime startTime = DateTime.Now; List <DatasetEvaluationResult> validationReports = new List <DatasetEvaluationResult>(), testReports = new List <DatasetEvaluationResult>(); TrainingSessionResult PrepareResult(TrainingStopReason reason, int loops) { return(new TrainingSessionResult(reason, loops, DateTime.Now.Subtract(startTime).RoundToSeconds(), validationReports, testReports)); } // Convergence manager for the validation dataset RelativeConvergence convergence = validationDataset == null ? null : new RelativeConvergence(validationDataset.Tolerance, validationDataset.EpochsInterval); // Optional batch monitor BatchProgressMonitor batchMonitor = batchProgress == null ? null : new BatchProgressMonitor(miniBatches.Count, batchProgress); // Create the training batches for (int i = 0; i < epochs; i++) { // Shuffle the training set miniBatches.CrossShuffle(); // Gradient descent over the current batches for (int j = 0; j < miniBatches.BatchesCount; j++) { if (token.IsCancellationRequested) { return(PrepareResult(TrainingStopReason.TrainingCanceled, i)); } network.Backpropagate(miniBatches.Batches[j], dropout, updater); batchMonitor?.NotifyCompletedBatch(miniBatches.Batches[j].X.GetLength(0)); } batchMonitor?.Reset(); // Check for overflows if (!Parallel.For(0, network._Layers.Length, (j, state) => { if (network._Layers[j] is WeightedLayerBase layer && !layer.ValidateWeights()) { state.Break(); } }).IsCompleted)
public static TrainingSessionResult TrainNetwork( [NotNull] SequentialNetwork network, [NotNull] BatchesCollection batches, int epochs, float dropout, [NotNull] ITrainingAlgorithmInfo algorithm, [CanBeNull] IProgress <BatchProgress> batchProgress, [CanBeNull] IProgress <TrainingProgressEventArgs> trainingProgress, [CanBeNull] ValidationDataset validationDataset, [CanBeNull] TestDataset testDataset, CancellationToken token) { SharedEventsService.TrainingStarting.Raise(); WeightsUpdater optimizer; switch (algorithm) { /* ================= * Optimization * ================= * The right optimizer is selected here, and the capatured closure for each of them also contains local temporary data, if needed. * In this case, the temporary data is managed, so that it will automatically be disposed by the GC and there won't be the need to use * another callback when the training stops to handle the cleanup of unmanaged resources. */ case MomentumInfo momentum: optimizer = WeightsUpdaters.Momentum(momentum, network); break; case StochasticGradientDescentInfo sgd: optimizer = WeightsUpdaters.StochasticGradientDescent(sgd); break; case AdaGradInfo adagrad: optimizer = WeightsUpdaters.AdaGrad(adagrad, network); break; case AdaDeltaInfo adadelta: optimizer = WeightsUpdaters.AdaDelta(adadelta, network); break; case AdamInfo adam: optimizer = WeightsUpdaters.Adam(adam, network); break; case AdaMaxInfo adamax: optimizer = WeightsUpdaters.AdaMax(adamax, network); break; case RMSPropInfo rms: optimizer = WeightsUpdaters.RMSProp(rms, network); break; default: throw new ArgumentException("The input training algorithm type is not supported"); } return(Optimize(network, batches, epochs, dropout, optimizer, batchProgress, trainingProgress, validationDataset, testDataset, token)); }
public static WeightsUpdater Momentum([NotNull] MomentumInfo info, [NotNull] SequentialNetwork network) { // Setup float eta = info.Eta, lambda = info.Lambda, momentum = info.Momentum; float[][] mW = new float[network.WeightedLayersIndexes.Length][], mB = new float[network.WeightedLayersIndexes.Length][]; for (int i = 0; i < network.WeightedLayersIndexes.Length; i++) { WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>(); mW[i] = new float[layer.Weights.Length]; mB[i] = new float[layer.Biases.Length]; } // Closure unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer) { // Tweak the weights float alpha = eta / samples, l2Factor = eta * lambda / samples; fixed(float *pw = layer.Weights, pmw = mW[i]) { float *pdj = dJdw; int w = layer.Weights.Length; for (int x = 0; x < w; x++) { pmw[x] = momentum * pmw[x] + pdj[x]; pw[x] -= l2Factor * pw[x] + alpha * pmw[x]; } } // Tweak the biases of the lth layer fixed(float *pb = layer.Biases, pmb = mB[i]) { float *pdj = dJdb; int w = layer.Biases.Length; for (int b = 0; b < w; b++) { pmb[b] = momentum * pmb[b] + pdj[b]; pb[b] -= alpha * pdj[b]; } } } return(Minimize); }
public static WeightsUpdater AdaMax([NotNull] AdaMaxInfo info, [NotNull] SequentialNetwork network) { // Initialize AdaDelta parameters float eta = info.Eta, beta1 = info.Beta1, beta2 = info.Beta2; float[][] mW = new float[network.WeightedLayersIndexes.Length][], uW = new float[network.WeightedLayersIndexes.Length][], mB = new float[network.WeightedLayersIndexes.Length][], uB = new float[network.WeightedLayersIndexes.Length][]; float[] beta1t = new float[network.WeightedLayersIndexes.Length]; for (int i = 0; i < network.WeightedLayersIndexes.Length; i++) { WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>(); mW[i] = new float[layer.Weights.Length]; uW[i] = new float[layer.Weights.Length]; mB[i] = new float[layer.Biases.Length]; uB[i] = new float[layer.Biases.Length]; beta1t[i] = beta1; } // AdaDelta update for weights and biases unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer) { // Alpha at timestep t float b1t = beta1t[i]; beta1t[i] *= beta1; // Weights fixed(float *pw = layer.Weights, pm = mW[i], pu = uW[i]) { float *pdJ = dJdw; int w = layer.Weights.Length; for (int x = 0; x < w; x++) { float pdJi = pdJ[x]; pm[x] = beta1 * pm[x] + (1 - beta1) * pdJi; pu[x] = (beta2 * pu[x]).Max(pdJi.Abs()); pw[x] -= eta / (1 - b1t) * pm[x] / pu[x]; } } // Biases fixed(float *pb = layer.Biases, pm = mB[i], pu = uB[i]) { float *pdJ = dJdb; int w = layer.Biases.Length; for (int b = 0; b < w; b++) { float pdJi = pdJ[b]; pm[b] = beta1 * pm[b] + (1 - beta1) * pdJi; pu[b] = (beta2 * pu[b]).Max(pdJi.Abs()); pb[b] -= eta / (1 - b1t) * pm[b] / pu[b]; } } } return(Minimize); } #endregion }
public static WeightsUpdater Adam([NotNull] AdamInfo info, [NotNull] SequentialNetwork network) { // Initialize Adam parameters float eta = info.Eta, beta1 = info.Beta1, beta2 = info.Beta2, epsilon = info.Epsilon; float[][] mW = new float[network.WeightedLayersIndexes.Length][], vW = new float[network.WeightedLayersIndexes.Length][], mB = new float[network.WeightedLayersIndexes.Length][], vB = new float[network.WeightedLayersIndexes.Length][]; float[] beta1t = new float[network.WeightedLayersIndexes.Length], beta2t = new float[network.WeightedLayersIndexes.Length]; for (int i = 0; i < network.WeightedLayersIndexes.Length; i++) { WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>(); mW[i] = new float[layer.Weights.Length]; vW[i] = new float[layer.Weights.Length]; mB[i] = new float[layer.Biases.Length]; vB[i] = new float[layer.Biases.Length]; beta1t[i] = beta1; beta2t[i] = beta2; } // AdaDelta update for weights and biases unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer) { // Alpha at timestep t float alphat = eta * (float)Math.Sqrt(1 - beta2t[i]) / (1 - beta1t[i]); beta1t[i] *= beta1; beta2t[i] *= beta2; // Weights fixed(float *pw = layer.Weights, pm = mW[i], pv = vW[i]) { float *pdJ = dJdw; int w = layer.Weights.Length; for (int x = 0; x < w; x++) { float pdJi = pdJ[x]; pm[x] = pm[x] * beta1 + (1 - beta1) * pdJi; pv[x] = pv[x] * beta2 + (1 - beta2) * pdJi * pdJi; pw[x] -= alphat * pm[x] / ((float)Math.Sqrt(pv[x]) + epsilon); } } // Biases fixed(float *pb = layer.Biases, pm = mB[i], pv = vB[i]) { float *pdJ = dJdb; int w = layer.Biases.Length; for (int b = 0; b < w; b++) { float pdJi = pdJ[b]; pm[b] = pm[b] * beta1 + (1 - beta1) * pdJi; pv[b] = pv[b] * beta2 + (1 - beta2) * pdJi * pdJi; pb[b] -= alphat * pm[b] / ((float)Math.Sqrt(pv[b]) + epsilon); } } } return(Minimize); }
public static WeightsUpdater AdaDelta([NotNull] AdaDeltaInfo info, [NotNull] SequentialNetwork network) { // Initialize AdaDelta parameters float rho = info.Rho, epsilon = info.Epsilon, l2 = info.L2; float[][] egSquaredW = new float[network.WeightedLayersIndexes.Length][], eDeltaxSquaredW = new float[network.WeightedLayersIndexes.Length][], egSquaredB = new float[network.WeightedLayersIndexes.Length][], eDeltaxSquaredB = new float[network.WeightedLayersIndexes.Length][]; for (int i = 0; i < network.WeightedLayersIndexes.Length; i++) { WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>(); egSquaredW[i] = new float[layer.Weights.Length]; eDeltaxSquaredW[i] = new float[layer.Weights.Length]; egSquaredB[i] = new float[layer.Biases.Length]; eDeltaxSquaredB[i] = new float[layer.Biases.Length]; } // AdaDelta update for weights and biases unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer) { fixed(float *pw = layer.Weights, egSqrt = egSquaredW[i], eDSqrtx = eDeltaxSquaredW[i]) { float *pdj = dJdw; int w = layer.Weights.Length; for (int x = 0; x < w; x++) { float gt = pdj[x]; egSqrt[x] = rho * egSqrt[x] + (1 - rho) * gt * gt; float rmsDx_1 = (float)Math.Sqrt(eDSqrtx[x] + epsilon), rmsGt = (float)Math.Sqrt(egSqrt[x] + epsilon), dx = -(rmsDx_1 / rmsGt) * gt; eDSqrtx[x] = rho * eDSqrtx[x] + (1 - rho) * dx * dx; pw[x] += dx - l2 * pw[x]; } } // Tweak the biases of the lth layer fixed(float *pb = layer.Biases, egSqrt = egSquaredB[i], eDSqrtb = eDeltaxSquaredB[i]) { float *pdj = dJdb; int w = layer.Biases.Length; for (int b = 0; b < w; b++) { float gt = pdj[b]; egSqrt[b] = rho * egSqrt[b] + (1 - rho) * gt * gt; float rmsDx_1 = (float)Math.Sqrt(eDSqrtb[b] + epsilon), rmsGt = (float)Math.Sqrt(egSqrt[b] + epsilon), db = -(rmsDx_1 / rmsGt) * gt; eDSqrtb[b] = rho * eDSqrtb[b] + (1 - rho) * db * db; pb[b] += db - l2 * pb[b]; } } } return(Minimize); }