// return Expression of total loss public Expression BuildLMGraph(List <int> sent, bool fApplyDropout) { // Renew the computation graph dy.RenewCG(); // hidden -> word rep parameter Expression R = dy.parameter(p_R); // word bias Expression bias = dy.parameter(p_bias); // Build the collection of losses List <Expression> errs = new List <Expression>(); // Start the initial state with the a <s> tag RNNState state = builder.GetInitialState().AddInput(lp[d["<s>"]]); // Go through all the inputs for (int t = 0; t < sent.Count; t++) { // Regular softmax Expression u_t = dy.affine_transform(bias, R, state.Output()); errs.Add(dy.pickneglogsoftmax(u_t, sent[t])); // Add the next item in state = state.AddInput(dy.lookup(lp, sent[t])); }// next t // Add the last </s> tag Expression u_last = dy.affine_transform(bias, R, state.Output()); errs.Add(dy.pickneglogsoftmax(u_last, d["</s>"])); // Run the sum return(dy.esum(errs)); }
private static string GenerateSentence(string inputSentence, ParameterGroup pg) { dy.RenewCG(); List <Expression> embeds = EmbedSentence(inputSentence, pg); List <Expression> encodings = EncodeSentence(embeds, pg); // Create the matrix - of all the context vectors Expression inputMat = dy.concatenate_cols(encodings); // Each attention is an activation layer on top of the sum of w1*inputMat and w2*state // Since w1*inputMat is static - calculate it here Expression w1dt = pg.attention_w1 * inputMat; // Create the initial state of the decoder RNNState decState = pg.dec_lstm.GetInitialState(); // Run the EOS through (attend initial will be zeros) decState = decState.AddInput(dy.concatenate(dy.zeros(new[] { STATE_SIZE * 2 }), pg.output_lookup[pg.c2i[pg.EOS]])); List <string> output = new List <string>(); Expression prev = pg.output_lookup[pg.c2i[pg.EOS]]; // Go through and decode for (int i = 0; i < inputSentence.Length * 2; i++) { // Create the input Expression inputVec = dy.concatenate(Attend(inputMat, w1dt, decState, pg), prev); // Run through LSTM + linear layer decState = decState.AddInput(inputVec); Expression outputVec = dy.softmax(pg.decoder_W * decState.Output() + pg.decoder_b); // Get the predictions int max = Argmax(outputVec.VectorValue()); if (max == pg.c2i[pg.EOS]) { break; } output.Add(pg.i2c[max]); prev = pg.output_lookup[max]; }// next output return(string.Join("", output)); }
private static Expression DecodeSentence(List <Expression> encodings, string outputSentence, ParameterGroup pg) { // Pad the output *only at end* with eos List <string> output = outputSentence.Select(c => c.ToString()).ToList(); output.Add(pg.EOS); // Create the matrix - of all the context vectors Expression inputMat = dy.concatenate_cols(encodings); // Each attention is an activation layer on top of the sum of w1*inputMat and w2*state // Since w1*inputMat is static - calculate it here Expression w1dt = pg.attention_w1 * inputMat; // Create the initial state of the decoder RNNState decState = pg.dec_lstm.GetInitialState(); // Run the EOS through (attend initial will be zeros) decState = decState.AddInput(dy.concatenate(dy.zeros(new[] { STATE_SIZE * 2 }), pg.output_lookup[pg.c2i[pg.EOS]])); List <Expression> losses = new List <Expression>(); // Go through and decode Expression prev = pg.output_lookup[pg.c2i[pg.EOS]]; foreach (string outS in output) { // Create the input Expression inputVec = dy.concatenate(Attend(inputMat, w1dt, decState, pg), prev); // Run through LSTM + linear layer decState = decState.AddInput(inputVec); Expression outputVec = dy.softmax(pg.decoder_W * decState.Output() + pg.decoder_b); // Loss & next losses.Add(-dy.log(dy.pick(outputVec, pg.c2i[outS]))); prev = pg.output_lookup[pg.c2i[outS]]; }// next output return(dy.sum(losses)); }