/// <summary> /// /// </summary> /// <param name="inputWidth"></param> /// <param name="inputHeight"></param> /// <param name="inputDepth"></param> /// <param name="batchSize"></param> /// <param name="initializtion"></param> /// <param name="random"></param> public void Initialize(int inputWidth, int inputHeight, int inputDepth, int batchSize, Initialization initializtion, Random random) { var fans = WeightInitialization.GetFans(this, inputWidth, inputHeight, inputDepth); var distribution = WeightInitialization.GetWeightDistribution(initializtion, fans, random); Weights = Matrix <float> .Build.Random(fans.FanIn, fans.FanOut, distribution); Bias = Vector <float> .Build.Dense(fans.FanOut, 0.0f); WeightsGradients = Matrix <float> .Build.Dense(fans.FanIn, fans.FanOut); BiasGradients = Vector <float> .Build.Dense(fans.FanOut); OutputActivations = Matrix <float> .Build.Dense(batchSize, fans.FanOut); m_delta = Matrix <float> .Build.Dense(batchSize, fans.FanIn); }
/// <summary> /// /// </summary> /// <param name="inputWidth"></param> /// <param name="inputHeight"></param> /// <param name="inputDepth"></param> /// <param name="batchSize"></param> /// <param name="initializtion"></param> /// <param name="random"></param> public void Initialize(int inputWidth, int inputHeight, int inputDepth, int batchSize, Initialization initializtion, Random random) { InputHeight = inputHeight; InputWidth = inputWidth; InputDepth = inputDepth; var filterGridWidth = ConvUtils.GetFilterGridLength(InputWidth, FilterWidth, m_stride, m_padWidth, BorderMode); var filterGridHeight = ConvUtils.GetFilterGridLength(InputHeight, FilterHeight, m_stride, m_padHeight, BorderMode); // Calculations of dimensions based on: // Nvidia, cuDNN: Efficient Primitives for Deep Learning: https://arxiv.org/pdf/1410.0759.pdf var filterCubeSize = InputDepth * FilterWidth * FilterHeight; var filterGridSize = filterGridWidth * filterGridHeight; Height = filterGridHeight; Width = filterGridWidth; Depth = FilterCount; var fans = WeightInitialization.GetFans(this, InputWidth, InputHeight, inputDepth); var distribution = WeightInitialization.GetWeightDistribution(initializtion, fans, random); Weights = Matrix <float> .Build.Random(FilterCount, filterCubeSize, distribution); WeightsGradients = Matrix <float> .Build.Dense(FilterCount, filterCubeSize); Bias = Vector <float> .Build.Dense(FilterCount, 0.0f); BiasGradients = Vector <float> .Build.Dense(FilterCount); Im2Cols = Matrix <float> .Build.Dense(filterCubeSize, filterGridSize *batchSize); Conv = Matrix <float> .Build.Dense(FilterCount, filterGridSize *batchSize); OutputActivations = Matrix <float> .Build.Dense(batchSize, FilterCount *filterGridSize); m_deltaInReshape = Matrix <float> .Build.Dense(FilterCount, filterGridSize *batchSize); var fanIn = inputWidth * inputHeight * inputDepth; m_delta = Matrix <float> .Build.Dense(batchSize, fanIn); }
public OutputLayer(double[,] InputDims, int ThisLayersNeurons, int _MiniBatchSize, ActivationFunction _ActivationFunction, WeightInitialization _WeightInitialization, TrainAlgorithm _TrainAlgorithm, double _LearnRate, double _L2_Regularization, double _gradient) : base(InputDims, ThisLayersNeurons, _MiniBatchSize, _ActivationFunction, _WeightInitialization, _TrainAlgorithm, _LearnRate, _L2_Regularization, _gradient) { }
public Layer(double[,] InputDims_ZofPreviousLayer, int ThisLayersNeurons, int _MiniBatchSize, ActivationFunction _ActivationFunction, WeightInitialization _WeightInitialization, TrainAlgorithm _TrainAlgorithm, double _LearnRate, double _L2_Regularization, double _gradient)//Constructor For HiddenLayer and OutputLayer { TrainAlgorithm = _TrainAlgorithm; MiniBatchSize = _MiniBatchSize; gradient = _gradient; L2_Regularization = _L2_Regularization; LearnRate = _LearnRate; ActivationFunction = _ActivationFunction; //S is the matrix that holds the inputs to this layer S = new double[InputDims_ZofPreviousLayer.GetLength(0), ThisLayersNeurons]; //[Rows=BatchSize,Columns] //W is the ingoing weight matrix for this layer W = new double[InputDims_ZofPreviousLayer.GetLength(1), ThisLayersNeurons]; //[Rows=PreviousLayersNeurons + Bias,Columns=ThisLayersNeurons] if (_WeightInitialization == WeightInitialization.NormalizedGaussianRandom) { double mean = 0; double stdDev = 1; for (int i = 0; i < W.GetLength(0); i++) { for (int j = 0; j < W.GetLength(1); j++) { double u1 = 1.0 - rnd.NextDouble(); //uniform(0,1] random doubles double u2 = 1.0 - rnd.NextDouble(); double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Sin(2.0 * Math.PI * u2); //random normal(0,1) W[i, j] = mean + stdDev * randStdNormal; //random normal(mean,stdDev^2) } } } else if (_WeightInitialization == WeightInitialization.Random) { for (int i = 0; i < W.GetLength(0); i++) { for (int j = 0; j < W.GetLength(1); j++) { W[i, j] = rnd.NextDouble() * 2 - 1; } } } //safe previous gradient for adaptive learnrate dW_t_minus_1 = new double[W.GetLength(0), W.GetLength(1)]; //initialize dW_t_minus_1 as zeros-Matrix for (int i = 0; i < dW_t_minus_1.GetLength(0); i++) { for (int j = 0; j < dW_t_minus_1.GetLength(1); j++) { dW_t_minus_1[i, j] = 0; } } //initialaze the AdaptiveLearnRateGain AdaptiveLearnRateGain = new double[W.GetLength(0), W.GetLength(1)]; //initialize AdaptiveLearnRateGain as ones-Matrix for (int i = 0; i < AdaptiveLearnRateGain.GetLength(0); i++) { for (int j = 0; j < AdaptiveLearnRateGain.GetLength(1); j++) { AdaptiveLearnRateGain[i, j] = 1; } } if (TrainAlgorithm == TrainAlgorithm.Adam) { //initialize Velocity_Adam as zeros-Matrix Velocity_Adam = new double[W.GetLength(0), W.GetLength(1)]; //initialize Momentum_Adam as zeros-Matrix Momentum_Adam = new double[W.GetLength(0), W.GetLength(1)]; } //Z is the matrix that holds output values if (GetType() == typeof(HiddenLayer)) { Z = new double[S.GetLength(0), S.GetLength(1) + Bias]; //[Rows=BatchSize,Columns=ThisLayersNeurons + Bias] //Setting Bias-Column (= Last Column) of Z to 1 for (int i = 0; i < Z.GetLength(0); i++) //Z.GetLenght(0) = BatchSize { Z[i, Z.GetLength(1) - Bias] = 1; } } else//if OutputLayer, there's no Bias on Z { this.Z = new double[this.S.GetLength(0), this.S.GetLength(1)]; } //D is the matrix that holds the deltas for this layer this.D = new double[this.S.GetLength(1), this.S.GetLength(0)]; //[Rows=ThisLayersNeurons,Columns=BatchSize] //dF is the matrix that holds the derivatives of the activation function. Bias doesent have a derevative this.dF = new double[this.S.GetLength(1), this.S.GetLength(0)]; //[Rows=ThisLayersNeurons,Columns=BatchSize] }
public Layer(double[,,] InputDims_ZofPreviousLayer, int _FilterSize, int _Stride, int _Depth, int _MiniBatchSize, ActivationFunction _ActivationFunction, WeightInitialization _WeightInitialization, TrainAlgorithm _TrainAlgorithm, double _LearnRate, double _L2_Regularization, double _gradient)//Constructor For ConvolutionalLayer { TrainAlgorithm = _TrainAlgorithm; MiniBatchSize = _MiniBatchSize; gradient = _gradient; L2_Regularization = _L2_Regularization; Stride = _Stride; //InputDims_ZofPreviousLayer.GetLength(0) = Height(i-1) //InputDims_ZofPreviousLayer.GetLength(1) =Width(i-1) //InputDims_ZofPreviousLayer.GetLength(2) = Depth(i-1)*MiniBacthSize = DepthOfInputLayer*MiniBatchSize double dHeight = (InputDims_ZofPreviousLayer.GetLength(0) - 1.0) / Stride + 1.0; //when changing something here, also change in ANNMAth.Im2Mat double dWidth = (InputDims_ZofPreviousLayer.GetLength(1) - 1.0) / Stride + 1.0; //when changing something here, also change in ANNMAth.Im2Mat if (dHeight % 1 == 0 && dWidth % 1 == 0) { Height = (int)dHeight; Width = (int)dWidth; } else { throw new Exception("xxxx"); } Depth = _Depth; FilterSize = _FilterSize; if (FilterSize % 2 == 0)//Filtersize must be an uneaven number { throw new Exception("xxxx"); } Stride = _Stride; LearnRate = _LearnRate; ActivationFunction = _ActivationFunction; //PreInput is the Matrix that holds the zeropadded, Bias-added and arranged Input for the Computation of S PreInput = new double[Height * Width * MiniBatchSize, FilterSize *FilterSize *InputDims_ZofPreviousLayer.GetLength(2) / MiniBatchSize + Bias]; //S is the matrix that holds the inputs to this layer S = new double[Height * Width * MiniBatchSize, Depth]; //W is the ingoing rotated weight matrix for this layer inclusive a Bias for each Kernel W = new double[FilterSize * FilterSize * InputDims_ZofPreviousLayer.GetLength(2) / MiniBatchSize + Bias, Depth];//Have a look at page 11 in Theory, rotated WeightMatrix for ForewardPass if (_WeightInitialization == WeightInitialization.NormalizedGaussianRandom) { double mean = 0; double stdDev = 1; for (int i = 0; i < W.GetLength(0); i++) { for (int j = 0; j < W.GetLength(1); j++) { //W[i, j] = rnd.NextDouble() * 2 - 1; //Old initialization of weights double u1 = 1.0 - rnd.NextDouble(); //uniform(0,1] random doubles double u2 = 1.0 - rnd.NextDouble(); double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Sin(2.0 * Math.PI * u2); //random normal(0,1) W[i, j] = mean + stdDev * randStdNormal; //random normal(mean,stdDev^2) } } } else if (_WeightInitialization == WeightInitialization.Random) { for (int i = 0; i < W.GetLength(0); i++) { for (int j = 0; j < W.GetLength(1); j++) { W[i, j] = rnd.NextDouble() * 2 - 1; } } } //safe previous gradient for adaptive learnrate dW_t_minus_1 = new double[W.GetLength(0), W.GetLength(1)]; //initialize dW_t_minus_1 as zeros-Matrix for (int i = 0; i < dW_t_minus_1.GetLength(0); i++) { for (int j = 0; j < dW_t_minus_1.GetLength(1); j++) { dW_t_minus_1[i, j] = 0; } } //initialaze the AdaptiveLearnRateGain AdaptiveLearnRateGain = new double[W.GetLength(0), W.GetLength(1)]; //initialize AdaptiveLearnRateGain as ones-Matrix for (int i = 0; i < AdaptiveLearnRateGain.GetLength(0); i++) { for (int j = 0; j < AdaptiveLearnRateGain.GetLength(1); j++) { AdaptiveLearnRateGain[i, j] = 1; } } if (TrainAlgorithm == TrainAlgorithm.Adam) { //initialize Velocity_Adam as zeros-Matrix Velocity_Adam = new double[W.GetLength(0), W.GetLength(1)]; //initialize Momentum_Adam as zeros-Matrix Momentum_Adam = new double[W.GetLength(0), W.GetLength(1)]; } //Z is the matrix that holds output values Z = new double[MiniBatchSize, Height *Width *Depth + Bias]; //[Row = MiniBatchSize, Column=NeuronsperChannel*NumberOfChannels+Bias] //Setting Bias of Z to 1 for (int i = 0; i < Z.GetLength(0); i++) //Z.GetLenght(0) = BatchSize { Z[i, Z.GetLength(1) - Bias] = 1; } Z_3D = new double[Height, Width, Depth *MiniBatchSize]; //D is the matrix that holds the deltas for this layer D = new double[Height * Width * MiniBatchSize, Depth]; D_3D = new double[Height, Width, Depth *MiniBatchSize]; //dF is the matrix that holds the derivatives of the activation function dF = new double[Height * Width * Depth, MiniBatchSize]; }
public ConvolutionalLayer(double[,,] InputDims_ZofPreviousLayer, int FilterSize, int Stride, int Depth, int _MiniBatchSize, ActivationFunction _ActivationFunction, WeightInitialization _WeightInitialization, TrainAlgorithm _TrainAlgorithm, double _LearnRate, double _L2_Regularization, double _gradient) : base(InputDims_ZofPreviousLayer, FilterSize, Stride, Depth, _MiniBatchSize, _ActivationFunction, _WeightInitialization, _TrainAlgorithm, _LearnRate, _L2_Regularization, _gradient) { }