Exemplo n.º 1
0
            // return Expression of total loss
            public Expression BuildLMGraph(List <int> sent, bool fApplyDropout)
            {
                // Renew the computation graph
                dy.RenewCG();

                // hidden -> word rep parameter
                Expression R = dy.parameter(p_R);
                // word bias
                Expression bias = dy.parameter(p_bias);

                // Build the collection of losses
                List <Expression> errs = new List <Expression>();

                // Start the initial state with the a <s> tag
                RNNState state = builder.GetInitialState().AddInput(lp[d["<s>"]]);

                // Go through all the inputs
                for (int t = 0; t < sent.Count; t++)
                {
                    // Regular softmax
                    Expression u_t = dy.affine_transform(bias, R, state.Output());
                    errs.Add(dy.pickneglogsoftmax(u_t, sent[t]));
                    // Add the next item in
                    state = state.AddInput(dy.lookup(lp, sent[t]));
                }// next t
                // Add the last </s> tag
                Expression u_last = dy.affine_transform(bias, R, state.Output());

                errs.Add(dy.pickneglogsoftmax(u_last, d["</s>"]));

                // Run the sum
                return(dy.esum(errs));
            }
Exemplo n.º 2
0
        private static Expression Attend(Expression inputMat, Expression w1dt, RNNState decState, ParameterGroup pg)
        {
            // We have w1dt which is Attention x len(seq)
            // Now, concate the hidden layers from the decoder and multiply that by w2
            // will give us 1xAttention
            Expression w2dt = pg.attention_w2 * dy.concatenate(decState.GetS());
            // Add that to each column, run through an activation layer, and those are our
            // "energies" (we have to transpose in order to get the vector dimensions)
            Expression unnormalized     = dy.transpose(pg.attention_v * dy.tanh(dy.colwise_add(w1dt, w2dt)));
            Expression attentionWeights = dy.softmax(unnormalized);

            // Apply the weights and return the new weighted ci
            return(inputMat * attentionWeights);
        }
Exemplo n.º 3
0
            public void RandomSample(int maxLen = 200)
            {
                // Renew the computation graph
                dy.RenewCG();

                // hidden -> word rep parameter
                Expression R = dy.parameter(p_R);
                // word bias
                Expression bias = dy.parameter(p_bias);

                Random r = new Random();
                // Start with an <s>
                RNNState state = builder.GetInitialState().AddInput(lp[d["<s>"]]);
                int      cur = d["<s>"], len = 0;

                while (len < maxLen)
                {
                    // Regular softmax
                    Expression u_t       = dy.affine_transform(bias, R, state.Output());
                    Expression dist_expr = dy.softmax(u_t);
                    float[]    dist      = dist_expr.VectorValue();
                    // Get a random between 0->1, sample the next item
                    double p = r.NextDouble();
                    for (cur = 0; cur < dist.Length; cur++)
                    {
                        p -= dist[cur];
                        if (p < 0)
                        {
                            break;
                        }
                    }
                    if (cur == dist.Length)
                    {
                        cur = d["</s>"];
                    }
                    // Are we at the end?
                    if (cur == d["</s>"])
                    {
                        break;
                    }
                    len++;
                    // Output the chracter
                    Console.Write((len == 1 ? "" : " ") + di2W[cur]);
                }// next prediction
                Console.WriteLine();
            }
Exemplo n.º 4
0
        private static string GenerateSentence(string inputSentence, ParameterGroup pg)
        {
            dy.RenewCG();

            List <Expression> embeds    = EmbedSentence(inputSentence, pg);
            List <Expression> encodings = EncodeSentence(embeds, pg);

            // Create the matrix - of all the context vectors
            Expression inputMat = dy.concatenate_cols(encodings);
            // Each attention is an activation layer on top of the sum of w1*inputMat and w2*state
            // Since w1*inputMat is static - calculate it here
            Expression w1dt = pg.attention_w1 * inputMat;

            // Create the initial state of the decoder
            RNNState decState = pg.dec_lstm.GetInitialState();

            // Run the EOS through (attend initial will be zeros)
            decState = decState.AddInput(dy.concatenate(dy.zeros(new[] { STATE_SIZE * 2 }), pg.output_lookup[pg.c2i[pg.EOS]]));

            List <string> output = new List <string>();
            Expression    prev   = pg.output_lookup[pg.c2i[pg.EOS]];

            // Go through and decode
            for (int i = 0; i < inputSentence.Length * 2; i++)
            {
                // Create the input
                Expression inputVec = dy.concatenate(Attend(inputMat, w1dt, decState, pg), prev);
                // Run through LSTM + linear layer
                decState = decState.AddInput(inputVec);
                Expression outputVec = dy.softmax(pg.decoder_W * decState.Output() + pg.decoder_b);
                // Get the predictions
                int max = Argmax(outputVec.VectorValue());
                if (max == pg.c2i[pg.EOS])
                {
                    break;
                }
                output.Add(pg.i2c[max]);
                prev = pg.output_lookup[max];
            }// next output

            return(string.Join("", output));
        }
Exemplo n.º 5
0
        private static Expression DecodeSentence(List <Expression> encodings, string outputSentence, ParameterGroup pg)
        {
            // Pad the output *only at end* with eos
            List <string> output = outputSentence.Select(c => c.ToString()).ToList();

            output.Add(pg.EOS);

            // Create the matrix - of all the context vectors
            Expression inputMat = dy.concatenate_cols(encodings);
            // Each attention is an activation layer on top of the sum of w1*inputMat and w2*state
            // Since w1*inputMat is static - calculate it here
            Expression w1dt = pg.attention_w1 * inputMat;

            // Create the initial state of the decoder
            RNNState decState = pg.dec_lstm.GetInitialState();

            // Run the EOS through (attend initial will be zeros)
            decState = decState.AddInput(dy.concatenate(dy.zeros(new[] { STATE_SIZE * 2 }), pg.output_lookup[pg.c2i[pg.EOS]]));

            List <Expression> losses = new List <Expression>();
            // Go through and decode
            Expression prev = pg.output_lookup[pg.c2i[pg.EOS]];

            foreach (string outS in output)
            {
                // Create the input
                Expression inputVec = dy.concatenate(Attend(inputMat, w1dt, decState, pg), prev);
                // Run through LSTM + linear layer
                decState = decState.AddInput(inputVec);
                Expression outputVec = dy.softmax(pg.decoder_W * decState.Output() + pg.decoder_b);
                // Loss & next
                losses.Add(-dy.log(dy.pick(outputVec, pg.c2i[outS])));
                prev = pg.output_lookup[pg.c2i[outS]];
            }// next output

            return(dy.sum(losses));
        }