Esempio n. 1
0
        public static Model ConvolutionalNeuralNetworkModel()
        {
            var images = Variable <float>();
            var labels = Variable <float>();

            ILayer <float> net = new Reshape <float>(images, PartialShape.Create(-1, 1, 28, 28));

            net = new Convolution2D <float>(net.Output, 5, 5, 16);
            net = new ActivationReLU <float>(net.Output);
            net = new Pooling2D <float>(net.Output, PoolingMode.MAX, 2, 2, 2, 2);

            net = new Convolution2D <float>(net.Output, 5, 5, 32);
            net = new ActivationTanh <float>(net.Output);
            net = new Pooling2D <float>(net.Output, PoolingMode.MAX, 2, 2, 2, 2);

            net = new Reshape <float>(net.Output, PartialShape.Create(-1, net.Output.Shape.Skip(1).Aggregate(ScalarOps.Mul)));
            net = new FullyConnected <float>(net.Output, 50);
            net = new ActivationTanh <float>(net.Output);
            net = new FullyConnected <float>(net.Output, 10);

            return(new Model {
                Loss = new SoftmaxCrossEntropy <float>(net.Output, labels),
                Images = images,
                Labels = labels
            });
        }
Esempio n. 2
0
            public Attention(Variable <T> encoderHiddenStates, Variable <T> decoderHiddenStates, long attentionDim)
            {
                AttentionDim        = attentionDim;
                EncoderHiddenStates = encoderHiddenStates;
                DecoderHiddenStates = decoderHiddenStates;

                // one goal is, try to make batchSize and encoderSeqLength unknown at symbol layer
                // so, in LSTM outer op, we can create one graph and one sub-executor, and applied for
                // different encoderSeqLength and batchSize.
                Util.EnsureEqual(3, EncoderHiddenStates.Shape.Rank, "states layout: (encoderSeqLength, batch, encoderHiddenSize)");
                Util.EnsureTrue(EncoderHiddenStates.Shape[2] > 0, "states should be determined.");
                EncoderHiddenSize = EncoderHiddenStates.Shape[2];

                Util.EnsureEqual(2, DecoderHiddenStates.Shape.Rank, "DecoderHiddenStates layout: (batch, decoderHiddenSize)");
                Util.EnsureTrue(DecoderHiddenStates.Shape[1] > 0, "DecoderHiddenStates should be determined.");
                DecoderHiddenSize = DecoderHiddenStates.Shape[1];

                var scaleWh = Sqrt(12.0.AsScalar <T>() / ((double)(AttentionDim + EncoderHiddenSize)).AsScalar <T>());

                Wh = Parameter(scaleWh * (RandomUniform <T>(Shape.Create(EncoderHiddenSize, AttentionDim), 0UL, 0UL) - 0.5.AsScalar <T>()));

                var scaleWd = Sqrt(12.0.AsScalar <T>() / ((double)(AttentionDim + DecoderHiddenSize)).AsScalar <T>());

                Wd = Parameter(scaleWd * (RandomUniform <T>(Shape.Create(DecoderHiddenSize, AttentionDim), 0UL, 0UL) - 0.5.AsScalar <T>()));

                var scaleV = Sqrt(12.0.AsScalar <T>() / ((double)(AttentionDim)).AsScalar <T>());

                V = Parameter(scaleV * (RandomUniform <T>(Shape.Create(AttentionDim, 1), 0UL, 0UL) - 0.5.AsScalar <T>()));

                // build the graph
                var h   = EncoderHiddenStates;                       // (n*b,He) // He denotes hiddenSize of encoder
                var d   = DecoderHiddenStates;                       // (b,Hd) // Hd denotes hiddenSize of decoder
                var whh = Dot(h.Reshape(-1, EncoderHiddenSize), Wh); // shape (n*b,K) K denotes attentionDim
                var wdd = Dot(d, Wd);                                // shape (b,K)

                // to add whh and wdd, we need broadcast, for this, we need to know at least n or b.
                // The decision here is to make b known at symbolic layer, because then you can have
                // flexibility on n (EncoderSeqLength), easier for making bucket.
                // another issue is, our backward of add has some issue dealing with 3d array which has broadcast
                // so, we can reshape them into 2d tensor here:
                // initial shape: (n*b,K) + (b,K)
                // reshape for the boadcast: (n,b*K) + (b*K) (for broadcasting, (b*K) will broadcast to (1,b*K)
                // then: (n,b*K) + (b*K) = (n,b*K)
                // reshape result to (n*b,K)
                BatchSize = EncoderHiddenStates.Shape[1];
                Util.EnsureTrue(BatchSize > 0, "Batch need to be determined.");
                Util.EnsureTrue(BatchSize == DecoderHiddenStates.Shape[0]);
                var sum = (whh.Reshape(-1, BatchSize * AttentionDim) + wdd.Reshape(-1)).Reshape(-1, AttentionDim);

                // tanh, shape no change (n*b,K)
                var whd = new ActivationTanh <T>(sum);

                // (n*b,K) dot (K,1) = (n*b,1) => reshape to (n,b)
                var u = Dot(whd.Output, V).Reshape(-1, BatchSize);

                // same shape (n,b)
                var softmax = new Softmax <T>(u);

                // sum (n,b) * (n,b,d)
                var reduce = new AttentionReduce <T>(softmax.Output.Reshape(-1, BatchSize), h);

                Output = reduce.Output;
            }