public int[][] ProcessNBest(Sentence sent, int nbest) { if (rnn.IsCRFTraining == false) { throw new ArgumentException("N-best result is only for RNN-CRF model."); } Sequence seq = Featurizer.ExtractFeatures(sent); int[][] predicted = rnn.DecodeNBestCRF(seq, nbest); return(predicted); }
static void LoadDataset(string strFileName, Featurizer featurizer, DataSet dataSet) { CheckCorpus(strFileName); StreamReader sr = new StreamReader(strFileName); int RecordCount = 0; while (true) { //Extract features from it and convert it into sequence Sentence sent = new Sentence(ReadRecord(sr)); if (sent.TokensList.Count <= 2) { //No more record, it only contain <s> and </s> break; } Sequence seq = featurizer.ExtractFeatures(sent); //Set label for the sequence seq.SetLabel(sent, featurizer.TagSet); //Add the sequence into data set dataSet.SequenceList.Add(seq); //Show state at every 1000 record RecordCount++; if (RecordCount % 10000 == 0) { Logger.WriteLine(Logger.Level.info, "{0}...", RecordCount); } } sr.Close(); }
static void LoadDataset(string strFileName, Featurizer featurizer, DataSet dataSet) { CheckCorpus(strFileName); StreamReader sr = new StreamReader(strFileName); int RecordCount = 0; while (true) { List<string> tokenList = ReadRecord(sr); if (tokenList.Count == 0) { //No more record break; } //Extract features from it and convert it into sequence Sentence sent = new Sentence(); sent.SetFeatures(tokenList); Sequence seq = featurizer.ExtractFeatures(sent); //Set label for the sequence if (seq.SetLabel(sent, featurizer.GetTagSet()) == false) { Console.WriteLine("Error: Invalidated record."); sent.DumpFeatures(); continue; } //Add the sequence into data set dataSet.Add(seq); //Show state at every 1000 record RecordCount++; if (RecordCount % 10000 == 0) { Console.Write("{0}...", RecordCount); } } Console.WriteLine(); sr.Close(); }
public override int[] TestSeq2Seq(Sentence srcSentence, Featurizer featurizer) { State curState = featurizer.ExtractFeatures(new string[] { "<s>" }); curState.Label = featurizer.TagSet.GetIndex("<s>"); //Reset all layers foreach (SimpleLayer layer in HiddenLayerList) { layer.netReset(false); } //Extract features from source sentence Sequence srcSequence = featurizer.AutoEncoder.Featurizer.ExtractFeatures(srcSentence); double[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(featurizer.AutoEncoder, srcSequence, curState.SparseFeature.Length, out srcHiddenAvgOutput, out srcSparseFeatures); int numLayers = HiddenLayerList.Count; List <int> predicted = new List <int>(); predicted.Add(curState.Label); while (true) { //Build sparse features SparseVector sparseVector = new SparseVector(); sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(curState.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer double[] denseFeatures = RNNHelper.ConcatenateVector(curState.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].computeLayer(sparseVector, denseFeatures, false); //Compute middle layers for (int i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].cellOutput, srcHiddenAvgOutput); HiddenLayerList[i].computeLayer(sparseVector, denseFeatures, false); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].cellOutput, srcHiddenAvgOutput); OutputLayer.computeLayer(sparseVector, denseFeatures, false); OutputLayer.Softmax(false); int nextTagId = OutputLayer.GetBestOutputIndex(false); string nextWord = featurizer.TagSet.GetTagName(nextTagId); curState = featurizer.ExtractFeatures(new string[] { nextWord }); curState.Label = nextTagId; predicted.Add(nextTagId); if (nextWord == "</s>" || predicted.Count >= 100) { break; } } return(predicted.ToArray()); }