示例#1
0
        public static INeuralNetwork TryLoad([NotNull] Stream stream, ExecutionModePreference preference)
        {
            try
            {
                using (GZipStream gzip = new GZipStream(stream, CompressionMode.Decompress))
                {
                    if (!gzip.TryRead(out NetworkType model))
                    {
                        return(null);
                    }
                    switch (model)
                    {
                    case NetworkType.Sequential: return(SequentialNetwork.Deserialize(gzip, preference));

                    case NetworkType.ComputationGraph: return(ComputationGraphNetwork.Deserialize(gzip, preference));

                    default: return(null);
                    }
                }
            }
            catch
            {
                // Locked or invalid file
                return(null);
            }
        }
示例#2
0
        public static WeightsUpdater RMSProp([NotNull] RMSPropInfo info, [NotNull] SequentialNetwork network)
        {
            // Setup
            float
                eta     = info.Eta,
                rho     = info.Rho,
                lambda  = info.Lambda,
                epsilon = info.Epsilon;

            float[][]
            mW = new float[network.WeightedLayersIndexes.Length][],
            mB = new float[network.WeightedLayersIndexes.Length][];
            for (int i = 0; i < network.WeightedLayersIndexes.Length; i++)
            {
                WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>();
                mW[i] = new float[layer.Weights.Length];
                mB[i] = new float[layer.Biases.Length];
            }

            // Closure
            unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer)
            {
                // Tweak the weights
                float
                    alpha    = eta / samples,
                    l2Factor = eta * lambda / samples;

                fixed(float *pw = layer.Weights, pmw = mW[i])
                {
                    float *pdj = dJdw;
                    int    w   = layer.Weights.Length;

                    for (int x = 0; x < w; x++)
                    {
                        float pdJi = pdj[x];
                        pmw[x] = rho * pmw[x] + (1 - rho) * pdJi * pdJi;
                        pw[x] -= l2Factor * pw[x] + alpha * pdJi / ((float)Math.Sqrt(pmw[x]) + epsilon);
                    }
                }

                // Tweak the biases of the lth layer
                fixed(float *pb = layer.Biases, pmb = mB[i])
                {
                    float *pdj = dJdb;
                    int    w   = layer.Biases.Length;

                    for (int b = 0; b < w; b++)
                    {
                        float pdJi = pdj[b];
                        pmb[b] = rho * pmb[b] + (1 - rho) * pdJi * pdJi;
                        pb[b] -= alpha * pdJi / ((float)Math.Sqrt(pmb[b]) + epsilon);
                    }
                }
            }

            return(Minimize);
        }
示例#3
0
        private static TrainingSessionResult Optimize(
            SequentialNetwork network,
            BatchesCollection miniBatches,
            int epochs, float dropout,
            [NotNull] WeightsUpdater updater,
            [CanBeNull] IProgress <BatchProgress> batchProgress,
            [CanBeNull] IProgress <TrainingProgressEventArgs> trainingProgress,
            [CanBeNull] ValidationDataset validationDataset,
            [CanBeNull] TestDataset testDataset,
            CancellationToken token)
        {
            // Setup
            DateTime startTime = DateTime.Now;
            List <DatasetEvaluationResult>
            validationReports = new List <DatasetEvaluationResult>(),
                testReports   = new List <DatasetEvaluationResult>();

            TrainingSessionResult PrepareResult(TrainingStopReason reason, int loops)
            {
                return(new TrainingSessionResult(reason, loops, DateTime.Now.Subtract(startTime).RoundToSeconds(), validationReports, testReports));
            }

            // Convergence manager for the validation dataset
            RelativeConvergence convergence = validationDataset == null
                ? null
                : new RelativeConvergence(validationDataset.Tolerance, validationDataset.EpochsInterval);

            // Optional batch monitor
            BatchProgressMonitor batchMonitor = batchProgress == null ? null : new BatchProgressMonitor(miniBatches.Count, batchProgress);

            // Create the training batches
            for (int i = 0; i < epochs; i++)
            {
                // Shuffle the training set
                miniBatches.CrossShuffle();

                // Gradient descent over the current batches
                for (int j = 0; j < miniBatches.BatchesCount; j++)
                {
                    if (token.IsCancellationRequested)
                    {
                        return(PrepareResult(TrainingStopReason.TrainingCanceled, i));
                    }
                    network.Backpropagate(miniBatches.Batches[j], dropout, updater);
                    batchMonitor?.NotifyCompletedBatch(miniBatches.Batches[j].X.GetLength(0));
                }
                batchMonitor?.Reset();

                // Check for overflows
                if (!Parallel.For(0, network._Layers.Length, (j, state) =>
                {
                    if (network._Layers[j] is WeightedLayerBase layer && !layer.ValidateWeights())
                    {
                        state.Break();
                    }
                }).IsCompleted)
示例#4
0
        public static TrainingSessionResult TrainNetwork(
            [NotNull] SequentialNetwork network, [NotNull] BatchesCollection batches,
            int epochs, float dropout,
            [NotNull] ITrainingAlgorithmInfo algorithm,
            [CanBeNull] IProgress <BatchProgress> batchProgress,
            [CanBeNull] IProgress <TrainingProgressEventArgs> trainingProgress,
            [CanBeNull] ValidationDataset validationDataset,
            [CanBeNull] TestDataset testDataset,
            CancellationToken token)
        {
            SharedEventsService.TrainingStarting.Raise();
            WeightsUpdater optimizer;

            switch (algorithm)
            {
            /* =================
             * Optimization
             * =================
             * The right optimizer is selected here, and the capatured closure for each of them also contains local temporary data, if needed.
             * In this case, the temporary data is managed, so that it will automatically be disposed by the GC and there won't be the need to use
             * another callback when the training stops to handle the cleanup of unmanaged resources. */
            case MomentumInfo momentum:
                optimizer = WeightsUpdaters.Momentum(momentum, network);
                break;

            case StochasticGradientDescentInfo sgd:
                optimizer = WeightsUpdaters.StochasticGradientDescent(sgd);
                break;

            case AdaGradInfo adagrad:
                optimizer = WeightsUpdaters.AdaGrad(adagrad, network);
                break;

            case AdaDeltaInfo adadelta:
                optimizer = WeightsUpdaters.AdaDelta(adadelta, network);
                break;

            case AdamInfo adam:
                optimizer = WeightsUpdaters.Adam(adam, network);
                break;

            case AdaMaxInfo adamax:
                optimizer = WeightsUpdaters.AdaMax(adamax, network);
                break;

            case RMSPropInfo rms:
                optimizer = WeightsUpdaters.RMSProp(rms, network);
                break;

            default:
                throw new ArgumentException("The input training algorithm type is not supported");
            }
            return(Optimize(network, batches, epochs, dropout, optimizer, batchProgress, trainingProgress, validationDataset, testDataset, token));
        }
示例#5
0
        public static WeightsUpdater Momentum([NotNull] MomentumInfo info, [NotNull] SequentialNetwork network)
        {
            // Setup
            float
                eta      = info.Eta,
                lambda   = info.Lambda,
                momentum = info.Momentum;

            float[][]
            mW = new float[network.WeightedLayersIndexes.Length][],
            mB = new float[network.WeightedLayersIndexes.Length][];
            for (int i = 0; i < network.WeightedLayersIndexes.Length; i++)
            {
                WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>();
                mW[i] = new float[layer.Weights.Length];
                mB[i] = new float[layer.Biases.Length];
            }

            // Closure
            unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer)
            {
                // Tweak the weights
                float
                    alpha    = eta / samples,
                    l2Factor = eta * lambda / samples;

                fixed(float *pw = layer.Weights, pmw = mW[i])
                {
                    float *pdj = dJdw;
                    int    w   = layer.Weights.Length;

                    for (int x = 0; x < w; x++)
                    {
                        pmw[x] = momentum * pmw[x] + pdj[x];
                        pw[x] -= l2Factor * pw[x] + alpha * pmw[x];
                    }
                }

                // Tweak the biases of the lth layer
                fixed(float *pb = layer.Biases, pmb = mB[i])
                {
                    float *pdj = dJdb;
                    int    w   = layer.Biases.Length;

                    for (int b = 0; b < w; b++)
                    {
                        pmb[b] = momentum * pmb[b] + pdj[b];
                        pb[b] -= alpha * pdj[b];
                    }
                }
            }

            return(Minimize);
        }
示例#6
0
        public static WeightsUpdater AdaMax([NotNull] AdaMaxInfo info, [NotNull] SequentialNetwork network)
        {
            // Initialize AdaDelta parameters
            float
                eta   = info.Eta,
                beta1 = info.Beta1,
                beta2 = info.Beta2;

            float[][]
            mW = new float[network.WeightedLayersIndexes.Length][],
            uW = new float[network.WeightedLayersIndexes.Length][],
            mB = new float[network.WeightedLayersIndexes.Length][],
            uB = new float[network.WeightedLayersIndexes.Length][];
            float[] beta1t = new float[network.WeightedLayersIndexes.Length];
            for (int i = 0; i < network.WeightedLayersIndexes.Length; i++)
            {
                WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>();
                mW[i]     = new float[layer.Weights.Length];
                uW[i]     = new float[layer.Weights.Length];
                mB[i]     = new float[layer.Biases.Length];
                uB[i]     = new float[layer.Biases.Length];
                beta1t[i] = beta1;
            }

            // AdaDelta update for weights and biases
            unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer)
            {
                // Alpha at timestep t
                float b1t = beta1t[i];

                beta1t[i] *= beta1;

                // Weights
                fixed(float *pw = layer.Weights, pm = mW[i], pu = uW[i])
                {
                    float *pdJ = dJdw;
                    int    w   = layer.Weights.Length;

                    for (int x = 0; x < w; x++)
                    {
                        float pdJi = pdJ[x];
                        pm[x]  = beta1 * pm[x] + (1 - beta1) * pdJi;
                        pu[x]  = (beta2 * pu[x]).Max(pdJi.Abs());
                        pw[x] -= eta / (1 - b1t) * pm[x] / pu[x];
                    }
                }

                // Biases
                fixed(float *pb = layer.Biases, pm = mB[i], pu = uB[i])
                {
                    float *pdJ = dJdb;
                    int    w   = layer.Biases.Length;

                    for (int b = 0; b < w; b++)
                    {
                        float pdJi = pdJ[b];
                        pm[b]  = beta1 * pm[b] + (1 - beta1) * pdJi;
                        pu[b]  = (beta2 * pu[b]).Max(pdJi.Abs());
                        pb[b] -= eta / (1 - b1t) * pm[b] / pu[b];
                    }
                }
            }

            return(Minimize);
        }

        #endregion
    }
示例#7
0
        public static WeightsUpdater Adam([NotNull] AdamInfo info, [NotNull] SequentialNetwork network)
        {
            // Initialize Adam parameters
            float
                eta     = info.Eta,
                beta1   = info.Beta1,
                beta2   = info.Beta2,
                epsilon = info.Epsilon;

            float[][]
            mW = new float[network.WeightedLayersIndexes.Length][],
            vW = new float[network.WeightedLayersIndexes.Length][],
            mB = new float[network.WeightedLayersIndexes.Length][],
            vB = new float[network.WeightedLayersIndexes.Length][];
            float[]
            beta1t = new float[network.WeightedLayersIndexes.Length],
            beta2t = new float[network.WeightedLayersIndexes.Length];
            for (int i = 0; i < network.WeightedLayersIndexes.Length; i++)
            {
                WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>();
                mW[i]     = new float[layer.Weights.Length];
                vW[i]     = new float[layer.Weights.Length];
                mB[i]     = new float[layer.Biases.Length];
                vB[i]     = new float[layer.Biases.Length];
                beta1t[i] = beta1;
                beta2t[i] = beta2;
            }

            // AdaDelta update for weights and biases
            unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer)
            {
                // Alpha at timestep t
                float alphat = eta * (float)Math.Sqrt(1 - beta2t[i]) / (1 - beta1t[i]);

                beta1t[i] *= beta1;
                beta2t[i] *= beta2;

                // Weights
                fixed(float *pw = layer.Weights, pm = mW[i], pv = vW[i])
                {
                    float *pdJ = dJdw;
                    int    w   = layer.Weights.Length;

                    for (int x = 0; x < w; x++)
                    {
                        float pdJi = pdJ[x];
                        pm[x]  = pm[x] * beta1 + (1 - beta1) * pdJi;
                        pv[x]  = pv[x] * beta2 + (1 - beta2) * pdJi * pdJi;
                        pw[x] -= alphat * pm[x] / ((float)Math.Sqrt(pv[x]) + epsilon);
                    }
                }

                // Biases
                fixed(float *pb = layer.Biases, pm = mB[i], pv = vB[i])
                {
                    float *pdJ = dJdb;
                    int    w   = layer.Biases.Length;

                    for (int b = 0; b < w; b++)
                    {
                        float pdJi = pdJ[b];
                        pm[b]  = pm[b] * beta1 + (1 - beta1) * pdJi;
                        pv[b]  = pv[b] * beta2 + (1 - beta2) * pdJi * pdJi;
                        pb[b] -= alphat * pm[b] / ((float)Math.Sqrt(pv[b]) + epsilon);
                    }
                }
            }

            return(Minimize);
        }
示例#8
0
        public static WeightsUpdater AdaDelta([NotNull] AdaDeltaInfo info, [NotNull] SequentialNetwork network)
        {
            // Initialize AdaDelta parameters
            float
                rho     = info.Rho,
                epsilon = info.Epsilon,
                l2      = info.L2;

            float[][]
            egSquaredW = new float[network.WeightedLayersIndexes.Length][],
            eDeltaxSquaredW = new float[network.WeightedLayersIndexes.Length][],
            egSquaredB      = new float[network.WeightedLayersIndexes.Length][],
            eDeltaxSquaredB = new float[network.WeightedLayersIndexes.Length][];
            for (int i = 0; i < network.WeightedLayersIndexes.Length; i++)
            {
                WeightedLayerBase layer = network._Layers[network.WeightedLayersIndexes[i]].To <NetworkLayerBase, WeightedLayerBase>();
                egSquaredW[i]      = new float[layer.Weights.Length];
                eDeltaxSquaredW[i] = new float[layer.Weights.Length];
                egSquaredB[i]      = new float[layer.Biases.Length];
                eDeltaxSquaredB[i] = new float[layer.Biases.Length];
            }

            // AdaDelta update for weights and biases
            unsafe void Minimize(int i, in Tensor dJdw, in Tensor dJdb, int samples, WeightedLayerBase layer)
            {
                fixed(float *pw = layer.Weights, egSqrt = egSquaredW[i], eDSqrtx = eDeltaxSquaredW[i])
                {
                    float *pdj = dJdw;
                    int    w   = layer.Weights.Length;

                    for (int x = 0; x < w; x++)
                    {
                        float gt = pdj[x];
                        egSqrt[x] = rho * egSqrt[x] + (1 - rho) * gt * gt;
                        float
                            rmsDx_1 = (float)Math.Sqrt(eDSqrtx[x] + epsilon),
                            rmsGt   = (float)Math.Sqrt(egSqrt[x] + epsilon),
                            dx      = -(rmsDx_1 / rmsGt) * gt;
                        eDSqrtx[x] = rho * eDSqrtx[x] + (1 - rho) * dx * dx;
                        pw[x]     += dx - l2 * pw[x];
                    }
                }

                // Tweak the biases of the lth layer
                fixed(float *pb = layer.Biases, egSqrt = egSquaredB[i], eDSqrtb = eDeltaxSquaredB[i])
                {
                    float *pdj = dJdb;
                    int    w   = layer.Biases.Length;

                    for (int b = 0; b < w; b++)
                    {
                        float gt = pdj[b];
                        egSqrt[b] = rho * egSqrt[b] + (1 - rho) * gt * gt;
                        float
                            rmsDx_1 = (float)Math.Sqrt(eDSqrtb[b] + epsilon),
                            rmsGt   = (float)Math.Sqrt(egSqrt[b] + epsilon),
                            db      = -(rmsDx_1 / rmsGt) * gt;
                        eDSqrtb[b] = rho * eDSqrtb[b] + (1 - rho) * db * db;
                        pb[b]     += db - l2 * pb[b];
                    }
                }
            }

            return(Minimize);
        }