public Sequence BuildSequence(Sentence sentence) { var n = sentence.TokensList.Count; var sequence = new Sequence(n); //For each token, get its sparse and dense feature set according configuration and training corpus for (var i = 0; i < n; i++) { var state = sequence.States[i]; ExtractSparseFeature(i, n, sentence.TokensList, state); } if (preTrainType == RNNSharp.PRETRAIN_TYPE.AutoEncoder) { var outputs = autoEncoder.ComputeTopHiddenLayerOutput(sentence); for (var i = 0; i < n; i++) { var state = sequence.States[i]; state.DenseFeature = new SingleVector(outputs[i]); } } else { for (var i = 0; i < n; i++) { var state = sequence.States[i]; state.DenseFeature = ExtractDenseFeature(i, n, sentence.TokensList); } } return(sequence); }
private void ExtractSourceSentenceFeature(RNNDecoder decoder, Sequence srcSequence, int targetSparseFeatureSize, out double[] srcHiddenAvgOutput, out Dictionary <int, float> srcSparseFeatures) { List <double[]> srcOutputs = decoder.ComputeTopHiddenLayerOutput(srcSequence); srcHiddenAvgOutput = new double[srcOutputs[0].Length]; for (int i = 0; i < srcOutputs[0].Length; i++) { srcHiddenAvgOutput[i] = (srcOutputs[0][i] + srcOutputs[srcOutputs.Count - 1][i]) / 2.0; } srcSparseFeatures = new Dictionary <int, float>(); for (int i = 0; i < srcSequence.States.Length; i++) { foreach (KeyValuePair <int, float> kv in srcSequence.States[i].SparseFeature) { int srcSparseFeatureIndex = kv.Key + targetSparseFeatureSize; if (srcSparseFeatures.ContainsKey(srcSparseFeatureIndex) == false) { srcSparseFeatures.Add(srcSparseFeatureIndex, kv.Value); } else { srcSparseFeatures[srcSparseFeatureIndex] += kv.Value; } } } }
/// <summary> /// Extract features from source sequence /// </summary> /// <param name="decoder"></param> /// <param name="srcSequence"></param> /// <param name="targetSparseFeatureSize"></param> /// <param name="srcHiddenAvgOutput"></param> /// <param name="srcSparseFeatures"></param> private void ExtractSourceSentenceFeature(RNNDecoder decoder, Sequence srcSequence, int targetSparseFeatureSize, out float[] srcHiddenAvgOutput, out Dictionary <int, float> srcSparseFeatures) { //Extract dense features from source sequence var srcOutputs = decoder.ComputeTopHiddenLayerOutput(srcSequence); int srcSequenceDenseFeatureSize = srcOutputs[0].Length; int srcSequenceLength = srcOutputs.Count - 1; srcHiddenAvgOutput = new float[srcSequenceDenseFeatureSize * 2]; var j = 0; float[] srcOutputForward = srcOutputs[0]; float[] srcOutputBackward = srcOutputs[srcSequenceLength]; while (j < srcSequenceDenseFeatureSize - Vector <float> .Count) { var vForward = new Vector <float>(srcOutputForward, j); var vBackward = new Vector <float>(srcOutputBackward, j); vForward.CopyTo(srcHiddenAvgOutput, j); vBackward.CopyTo(srcHiddenAvgOutput, srcSequenceDenseFeatureSize + j); j += Vector <float> .Count; } while (j < srcSequenceDenseFeatureSize) { srcHiddenAvgOutput[j] = srcOutputForward[j]; srcHiddenAvgOutput[srcSequenceDenseFeatureSize + j] = srcOutputBackward[j]; j++; } //Extract sparse features from source sequence srcSparseFeatures = new Dictionary <int, float>(); for (var i = 0; i < srcSequence.States.Length; i++) { foreach (var kv in srcSequence.States[i].SparseFeature) { var srcSparseFeatureIndex = kv.Key + targetSparseFeatureSize; if (srcSparseFeatures.ContainsKey(srcSparseFeatureIndex) == false) { srcSparseFeatures.Add(srcSparseFeatureIndex, kv.Value); } else { srcSparseFeatures[srcSparseFeatureIndex] += kv.Value; } } } }