Beispiel #1
0
 public void ApplyGradients(NeuralNetworkOptions opts)
 {
     for (var layer = 0; layer < Weights.GetLength(0); layer++)
     {
         ManagedMatrix.Add(Weights[layer], Deltas[layer], -opts.Alpha);
     }
 }
 public void ApplyGradients(NeuralNetworkOptions opts)
 {
     // dWji = learning_rate * dWji
     // dWkj = learning_rate * dWkj
     // w_ji = w_ji - dWji
     // w_kj = w_kj - dWkj
     ManagedMatrix.Add(Wkj, DeltaWkj, -opts.Alpha);
     ManagedMatrix.Add(Wji, DeltaWji, -opts.Alpha);
 }
Beispiel #3
0
        // Backward propagation
        public void BackPropagation(ManagedArray input)
        {
            var last = Weights.GetLength(0) - 1;

            D[0] = ManagedMatrix.Diff(Y, Y_true);

            var current = 1;

            for (var layer = last - 1; layer >= 0; layer--)
            {
                var prev = current - 1;

                var W  = new ManagedArray(Weights[layer + 1].x - 1, Weights[layer + 1].y, false);
                var DZ = ManagedMatrix.DSigm(Z[layer]);

                D[current] = (new ManagedArray(W.x, D[prev].y, false));

                ManagedOps.Copy2D(W, Weights[layer + 1], 1, 0);
                ManagedMatrix.Multiply(D[current], D[prev], W);
                ManagedMatrix.Product(D[current], DZ);

                ManagedOps.Free(W, DZ);

                current++;
            }

            for (var layer = 0; layer < Weights.GetLength(0); layer++)
            {
                var tD = ManagedMatrix.Transpose(D[Weights.GetLength(0) - layer - 1]);

                Deltas[layer] = (new ManagedArray(Weights[layer].x, Weights[layer].y, false));

                ManagedMatrix.Multiply(Deltas[layer], tD, X[layer]);
                ManagedMatrix.Multiply(Deltas[layer], 1.0 / input.y);

                ManagedOps.Free(tD);
            }

            Cost = 0.0;
            L2   = 0.0;

            for (var i = 0; i < Y_true.Length(); i++)
            {
                L2   += 0.5 * (D[0][i] * D[0][i]);
                Cost += (-Y_true[i] * Math.Log(Y[i]) - (1 - Y_true[i]) * Math.Log(1 - Y[i]));
            }

            Cost /= input.y;
            L2   /= input.y;

            // Cleanup
            for (var layer = 0; layer < Weights.GetLength(0); layer++)
            {
                ManagedOps.Free(D[layer], X[layer], Z[layer]);
            }
        }
        public void ApplyGradients(NeuralNetworkOptions opts)
        {
            // dWji = learning_rate * dWji
            // dWkj = learning_rate * dWkj
            // w_ji = w_ji - dWji
            // w_kj = w_kj - dWkj
            ManagedMatrix.Add(Wkj, DeltaWkj, -opts.Alpha);
            ManagedMatrix.Add(Wji, DeltaWji, -opts.Alpha);

            // cleanup of arrays allocated in BackPropagation
            ManagedOps.Free(DeltaWji, DeltaWkj);
        }
Beispiel #5
0
        void ApplyGradients(ConvolutionalNeuralNetworkOptions opts)
        {
            for (var l = 1; l < Layers.Count; l++)
            {
                if (Layers[l].Type == LayerTypes.Convolution)
                {
                    ManagedMatrix.Add(Layers[l].FeatureMap, Layers[l].DeltaFeatureMap, -opts.Alpha);
                    ManagedMatrix.Add(Layers[l].Bias, Layers[l].DeltaBias, -opts.Alpha);
                }
            }

            ManagedMatrix.Add(Weights, WeightsDelta, -opts.Alpha);
            ManagedMatrix.Add(Bias, BiasDelta, -opts.Alpha);
        }
        // Forward Propagation
        public void Forward(ManagedArray training)
        {
            // add bias column to input layer
            var InputBias = new ManagedArray(1, training.y);

            ManagedOps.Set(InputBias, 1.0);

            // x = cbind(array(1, c(nrow(training_set), 1)), training_set)
            var x = ManagedMatrix.CBind(InputBias, training);

            // compute hidden layer activation

            // z_2 = x %*% t(w_ji)
            var tWji = new ManagedArray(Wji.y, Wji.x);

            ManagedMatrix.Transpose(tWji, Wji);

            Z2 = new ManagedArray(tWji.x, x.y);
            ManagedMatrix.Multiply(Z2, x, tWji);

            // z_j = nnet_sigmoid(z_2)
            var Zj = ManagedMatrix.Sigm(Z2);

            // add bias column to hidden layer output
            var HiddenBias = new ManagedArray(1, Zj.y);

            ManagedOps.Set(HiddenBias, 1.0);

            // a_2 = cbind(array(1, c(nrow(z_j), 1)), z_j)
            A2 = ManagedMatrix.CBind(HiddenBias, Zj);

            // compute output layer

            var tWkj = new ManagedArray(Wkj.y, Wkj.x);

            ManagedMatrix.Transpose(tWkj, Wkj);

            //  y_k = nnet_sigmoid(a_2 %*% t(w_kj))
            var A2Wkj = new ManagedArray(tWkj.x, A2.y);

            ManagedMatrix.Multiply(A2Wkj, A2, tWkj);

            Yk = ManagedMatrix.Sigm(A2Wkj);

            // cleanup
            ManagedOps.Free(A2Wkj, HiddenBias, InputBias);
            ManagedOps.Free(tWkj, tWji, x, Zj);
        }
Beispiel #7
0
        // Forward Propagation
        public void Forward(ManagedArray input)
        {
            // create bias column
            var InputBias = new ManagedArray(1, input.y, false);

            ManagedOps.Set(InputBias, 1.0);

            // Compute input activations
            var last = Weights.GetLength(0) - 1;

            for (var layer = 0; layer < Weights.GetLength(0); layer++)
            {
                var XX = layer == 0 ? ManagedMatrix.CBind(InputBias, input) : ManagedMatrix.CBind(InputBias, Activations[layer - 1]);
                var tW = ManagedMatrix.Transpose(Weights[layer]);
                var ZZ = ManagedMatrix.Multiply(XX, tW);

                X[layer] = XX;
                Z[layer] = ZZ;

                if (layer != last)
                {
                    var SS = ManagedMatrix.Sigm(ZZ);

                    Activations[layer] = SS;
                }
                else
                {
                    ManagedOps.Free(Y);

                    Y = ManagedMatrix.Sigm(ZZ);
                }

                ManagedOps.Free(tW);
            }

            // Cleanup
            for (var layer = 0; layer < Activations.GetLength(0); layer++)
            {
                ManagedOps.Free(Activations[layer]);
            }

            ManagedOps.Free(InputBias);
        }
        ManagedArray Labels(ManagedArray output, NeuralNetworkOptions opts)
        {
            var result     = new ManagedArray(opts.Categories, opts.Items);
            var eye_matrix = ManagedMatrix.Diag(opts.Categories);

            for (var y = 0; y < opts.Items; y++)
            {
                if (opts.Categories > 1)
                {
                    for (var x = 0; x < opts.Categories; x++)
                    {
                        result[x, y] = eye_matrix[x, (int)output[y] - 1];
                    }
                }
                else
                {
                    result[y] = output[y];
                }
            }

            ManagedOps.Free(eye_matrix);

            return(result);
        }
        // Backward propagation
        public void BackPropagation(ManagedArray training)
        {
            // add bias column to input layer
            var InputBias = new ManagedArray(1, training.y);

            ManagedOps.Set(InputBias, 1.0);

            // x = cbind(array(1, c(nrow(training_set), 1)), training_set)
            var x = ManagedMatrix.CBind(InputBias, training);

            // compute intermediate delta values per layer

            // d3 = y_k - y_matrix
            var D3 = ManagedMatrix.Diff(Yk, Y_output);

            //  d2 = d3 %*% w_kj[, 2:ncol(w_kj)] * nnet_dsigmoid(z_2)
            var sWkj = new ManagedArray(Wkj.x - 1, Wkj.y);

            ManagedOps.Copy2D(sWkj, Wkj, 1, 0);

            var D2 = new ManagedArray(sWkj.x, D3.y);

            ManagedMatrix.Multiply(D2, D3, sWkj);

            var DZ2 = ManagedMatrix.DSigm(Z2);

            ManagedMatrix.Product(D2, DZ2);

            // dWji = (t(d2) %*% x)
            // dWkj = (t(d3) % *% a_2)
            var tD2 = new ManagedArray(D2.y, D2.x);
            var tD3 = new ManagedArray(D3.y, D3.x);

            ManagedMatrix.Transpose(tD2, D2);
            ManagedMatrix.Transpose(tD3, D3);

            DeltaWji = new ManagedArray(Wji.x, Wji.y);
            DeltaWkj = new ManagedArray(Wkj.x, Wkj.y);

            ManagedMatrix.Multiply(DeltaWji, tD2, x);
            ManagedMatrix.Multiply(DeltaWkj, tD3, A2);

            // cost = sum(-y_matrix * log(y_k) - (1 - y_matrix) * log(1 - y_k))
            Cost = 0.0;
            L2   = 0.0;

            for (var i = 0; i < Y_output.Length(); i++)
            {
                L2   += 0.5 * (D3[i] * D3[i]);
                Cost += (-Y_output[i] * Math.Log(Yk[i]) - (1 - Y_output[i]) * Math.Log(1 - Yk[i]));
            }

            // cost = cost / m
            // dWji = dWji / m
            // dWkj = dWkj / m
            Cost /= training.y;
            L2   /= training.y;

            ManagedMatrix.Multiply(DeltaWji, 1.0 / training.y);
            ManagedMatrix.Multiply(DeltaWkj, 1.0 / training.y);

            // cleanup
            ManagedOps.Free(D2, D3, DZ2, InputBias);
            ManagedOps.Free(sWkj, tD2, tD3, x);

            // cleanup of arrays allocated in Forward
            ManagedOps.Free(A2, Yk, Z2);
        }
Beispiel #10
0
        // Update Network Weights based on computed errors
        public void BackPropagation(ManagedArray batch)
        {
            var n      = Layers.Count;
            var last   = n - 1;
            var batchz = Layers[last].Activation.z;

            // backprop deltas
            ManagedOps.Free(OutputDelta, OutputError);

            OutputDelta = new ManagedArray(Output, false);
            OutputError = new ManagedArray(Output, false);

            for (var x = 0; x < Output.Length(); x++)
            {
                // error
                OutputError[x] = Output[x] - batch[x];

                // output delta
                OutputDelta[x] = OutputError[x] * (Output[x] * (1 - Output[x]));
            }

            // Loss Function
            L = 0.5 * ManagedMatrix.SquareSum(OutputError) / batch.x;

            ManagedOps.Free(WeightsTransposed, FeatureVectorDelta);

            FeatureVectorDelta = new ManagedArray(FeatureVector, false);
            WeightsTransposed  = new ManagedArray(Weights, false);

            // feature vector delta
            ManagedMatrix.Transpose(WeightsTransposed, Weights);
            ManagedMatrix.Multiply(FeatureVectorDelta, WeightsTransposed, OutputDelta);

            // only conv layers has sigm function
            if (Layers[last].Type == LayerTypes.Convolution)
            {
                for (var x = 0; x < FeatureVectorDelta.Length(); x++)
                {
                    FeatureVectorDelta[x] = FeatureVectorDelta[x] * FeatureVector[x] * (1 - FeatureVector[x]);
                }
            }

            // reshape feature vector deltas into output map style
            var MapSize = Layers[last].Activation.x * Layers[last].Activation.y;
            var temp1D  = new ManagedArray(1, MapSize, false);
            var temp2D  = new ManagedArray(Layers[last].Activation.x, Layers[last].Activation.y, false);

            ManagedOps.Free(Layers[last].Delta);
            Layers[last].Delta = new ManagedArray(Layers[last].Activation, false);

            for (var j = 0; j < Layers[last].Activation.i; j++)
            {
                for (var ii = 0; ii < batchz; ii++)
                {
                    ManagedOps.Copy2D(temp1D, FeatureVectorDelta, ii, j * MapSize);
                    temp1D.Reshape(Layers[last].Activation.x, Layers[last].Activation.y);
                    ManagedMatrix.Transpose(temp2D, temp1D);
                    ManagedOps.Copy2D4D(Layers[last].Delta, temp2D, ii, j);
                    temp1D.Reshape(1, MapSize);
                }
            }

            ManagedOps.Free(temp1D, temp2D);

            for (var l = n - 2; l >= 0; l--)
            {
                var next = l + 1;

                if (Layers[l].Type == LayerTypes.Convolution)
                {
                    ManagedOps.Free(Layers[l].Delta);
                    Layers[l].Delta = new ManagedArray(Layers[l].Activation, false);

                    var xx = Layers[next].Scale * Layers[next].Activation.x;
                    var yy = Layers[next].Scale * Layers[next].Activation.y;

                    var FeatureMap         = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, false);
                    var FeatureMapExpanded = new ManagedArray(xx, yy, false);
                    var Activation         = new ManagedArray(xx, yy, false);
                    var Delta = new ManagedArray(xx, yy, false);

                    var Scale = (1.0 / (Layers[next].Scale * Layers[next].Scale));

                    for (var j = 0; j < Layers[l].Activation.i; j++)
                    {
                        for (var z = 0; z < batchz; z++)
                        {
                            ManagedOps.Copy4D2D(FeatureMap, Layers[next].Delta, z, j);
                            ManagedMatrix.Expand(FeatureMap, Layers[next].Scale, Layers[next].Scale, FeatureMapExpanded);
                            ManagedOps.Copy4D2D(Activation, Layers[l].Activation, z, j);

                            for (var x = 0; x < Delta.Length(); x++)
                            {
                                Delta[x] = Activation[x] * (1 - Activation[x]) * FeatureMapExpanded[x] * Scale;
                            }

                            ManagedOps.Copy2D4D(Layers[l].Delta, Delta, z, j);
                        }
                    }

                    ManagedOps.Free(FeatureMap, FeatureMapExpanded, Activation, Delta);
                }
                else if (Layers[l].Type == LayerTypes.Subsampling)
                {
                    ManagedOps.Free(Layers[l].Delta);
                    Layers[l].Delta = new ManagedArray(Layers[l].Activation, false);

                    var Delta      = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, batchz);
                    var FeatureMap = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false);
                    var rot180     = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false);
                    var z          = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz);
                    var ztemp      = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false);

                    for (var i = 0; i < Layers[l].Activation.i; i++)
                    {
                        ManagedOps.Set(z, 0.0);

                        for (var j = 0; j < Layers[next].Activation.i; j++)
                        {
                            ManagedOps.Copy4DIJ2D(FeatureMap, Layers[next].FeatureMap, i, j);
                            ManagedMatrix.Rotate180(rot180, FeatureMap);

                            ManagedOps.Copy4D3D(Delta, Layers[next].Delta, j);
                            ManagedConvolution.Full(Delta, rot180, ztemp);
                            ManagedMatrix.Add(z, ztemp);
                        }

                        ManagedOps.Copy3D4D(Layers[l].Delta, z, i);
                    }

                    ManagedOps.Free(Delta, FeatureMap, rot180, z, ztemp);
                }
            }

            // calc gradients
            for (var l = 1; l < n; l++)
            {
                var prev = l - 1;

                if (Layers[l].Type == LayerTypes.Convolution)
                {
                    ManagedOps.Free(Layers[l].DeltaFeatureMap, Layers[l].DeltaBias);

                    Layers[l].DeltaFeatureMap = new ManagedArray(Layers[l].FeatureMap, false);
                    Layers[l].DeltaBias       = new ManagedArray(Layers[l].OutputMaps, false);

                    var FeatureMapDelta = new ManagedArray(Layers[l].FeatureMap.x, Layers[l].FeatureMap.y, Layers[l].FeatureMap.z, false);

                    // d[j]
                    var dtemp = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false);

                    // a[i] and flipped
                    var atemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false);
                    var ftemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false);

                    for (var j = 0; j < Layers[l].Activation.i; j++)
                    {
                        ManagedOps.Copy4D3D(dtemp, Layers[l].Delta, j);

                        for (var i = 0; i < Layers[prev].Activation.i; i++)
                        {
                            ManagedOps.Copy4D3D(atemp, Layers[prev].Activation, i);
                            ManagedMatrix.FlipAll(ftemp, atemp);
                            ManagedConvolution.Valid(ftemp, dtemp, FeatureMapDelta);
                            ManagedMatrix.Multiply(FeatureMapDelta, 1.0 / batchz);

                            ManagedOps.Copy2D4DIJ(Layers[l].DeltaFeatureMap, FeatureMapDelta, i, j);
                        }

                        Layers[l].DeltaBias[j] = ManagedMatrix.Sum(dtemp) / batchz;
                    }

                    ManagedOps.Free(FeatureMapDelta, dtemp, atemp, ftemp);
                }
            }

            var FeatureVectorTransposed = new ManagedArray(FeatureVector, false);

            ManagedMatrix.Transpose(FeatureVectorTransposed, FeatureVector);

            ManagedOps.Free(WeightsDelta, BiasDelta);

            WeightsDelta = new ManagedArray(Weights, false);
            BiasDelta    = new ManagedArray(Bias, false);

            ManagedMatrix.Multiply(WeightsDelta, OutputDelta, FeatureVectorTransposed);
            ManagedMatrix.Multiply(WeightsDelta, 1.0 / batchz);
            ManagedMatrix.Mean(BiasDelta, OutputDelta, 0);

            ManagedOps.Free(FeatureVectorTransposed);
        }
Beispiel #11
0
        // Compute Forward Transform on 3D Input
        public void FeedForward(ManagedArray batch, bool pool = false)
        {
            var n    = Layers.Count;
            var last = n - 1;

            var InputMaps = 1;

            ManagedOps.Free(Layers[0].Activation);
            Layers[0].Activation = new ManagedArray(batch, false);

            ManagedOps.Copy4D3D(Layers[0].Activation, batch, 0);

            for (var l = 1; l < n; l++)
            {
                var prev = l - 1;

                if (Layers[l].Type == LayerTypes.Convolution)
                {
                    var zx = Layers[prev].Activation.x - Layers[l].KernelSize + 1;
                    var zy = Layers[prev].Activation.y - Layers[l].KernelSize + 1;
                    var zz = batch.z;

                    ManagedOps.Free(Layers[l].Activation);
                    Layers[l].Activation = new ManagedArray(zx, zy, zz, Layers[l].OutputMaps, 1, false);

                    var Activation = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batch.z, false);
                    var FeatureMap = new ManagedArray(Layers[l].KernelSize, Layers[l].KernelSize, false);

                    // create temp output map
                    var z     = new ManagedArray(zx, zy, zz);
                    var ztemp = new ManagedArray(zx, zy, zz, false);

                    // !!below can probably be handled by insane matrix operations
                    for (var j = 0; j < Layers[l].OutputMaps; j++) // for each output map
                    {
                        ManagedOps.Set(z, 0.0);

                        for (var i = 0; i < InputMaps; i++)
                        {
                            // copy Layers
                            ManagedOps.Copy4D3D(Activation, Layers[prev].Activation, i);
                            ManagedOps.Copy4DIJ2D(FeatureMap, Layers[l].FeatureMap, i, j);

                            // convolve with corresponding kernel and add to temp output map
                            ManagedConvolution.Valid(Activation, FeatureMap, ztemp);
                            ManagedMatrix.Add(z, ztemp);
                        }

                        // add bias, pass through nonlinearity
                        ManagedMatrix.Add(z, Layers[l].Bias[j]);
                        var sigm = ManagedMatrix.Sigm(z);
                        ManagedOps.Copy3D4D(Layers[l].Activation, sigm, j);

                        ManagedOps.Free(sigm);
                    }

                    ManagedOps.Free(Activation, FeatureMap, z, ztemp);

                    InputMaps = Layers[l].OutputMaps;
                }
                else if (Layers[l].Type == LayerTypes.Subsampling)
                {
                    // downsample

                    // generate downsampling kernel
                    var scale      = (double)(Layers[l].Scale * Layers[l].Scale);
                    var FeatureMap = new ManagedArray(Layers[l].Scale, Layers[l].Scale, false);
                    ManagedOps.Set(FeatureMap, 1.0 / scale);

                    ManagedOps.Free(Layers[l].Activation);
                    Layers[l].Activation = new ManagedArray(Layers[prev].Activation.x / Layers[l].Scale, Layers[prev].Activation.y / Layers[l].Scale, batch.z, InputMaps, 1);

                    var Activation = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batch.z, false);
                    var z          = new ManagedArray(Layers[prev].Activation.x - Layers[l].Scale + 1, Layers[prev].Activation.y - Layers[l].Scale + 1, batch.z, false);

                    for (var j = 0; j < InputMaps; j++)
                    {
                        // copy Layers
                        ManagedOps.Copy4D3D(Activation, Layers[prev].Activation, j);

                        // Subsample
                        ManagedConvolution.Valid(Activation, FeatureMap, z);

                        if (pool)
                        {
                            ManagedOps.Pool3D4D(Layers[l].Activation, z, j, Layers[l].Scale);
                        }
                        else
                        {
                            ManagedOps.Copy3D4D(Layers[l].Activation, z, j, Layers[l].Scale);
                        }
                    }

                    ManagedOps.Free(Activation, FeatureMap, z);
                }
            }

            var MapSize = Layers[last].Activation.x * Layers[last].Activation.y;

            ManagedOps.Free(FeatureVector);
            FeatureVector = new ManagedArray(batch.z, MapSize * Layers[last].Activation.i);

            var temp1D = new ManagedArray(Layers[last].Activation.y, Layers[last].Activation.x, false);
            var temp2D = new ManagedArray(Layers[last].Activation.x, Layers[last].Activation.y, false);

            // concatenate all end layer feature maps into vector
            for (var j = 0; j < Layers[last].Activation.i; j++)
            {
                for (var ii = 0; ii < batch.z; ii++)
                {
                    // Use Row-major in flattening the feature map
                    ManagedOps.Copy4D2D(temp2D, Layers[last].Activation, ii, j);
                    ManagedMatrix.Transpose(temp1D, temp2D);
                    temp1D.Reshape(1, MapSize);
                    ManagedOps.Copy2DOffset(FeatureVector, temp1D, ii, j * MapSize);
                }
            }

            var WeightsFeatureVector = new ManagedArray(FeatureVector.x, Weights.y, false);

            ManagedMatrix.Multiply(WeightsFeatureVector, Weights, FeatureVector);
            var repmat = new ManagedArray(batch.z, Bias.Length(), false);

            ManagedMatrix.Expand(Bias, batch.z, 1, repmat);
            ManagedMatrix.Add(WeightsFeatureVector, repmat);

            // feedforward into output perceptrons
            ManagedOps.Free(Output);
            Output = ManagedMatrix.Sigm(WeightsFeatureVector);

            ManagedOps.Free(WeightsFeatureVector, repmat, temp1D, temp2D);
        }