Beispiel #1
0
        public int[][] ProcessNBest(Sentence sent, int nbest)
        {
            if (rnn.IsCRFTraining == false)
            {
                throw new ArgumentException("N-best result is only for RNN-CRF model.");
            }

            Sequence seq = Featurizer.ExtractFeatures(sent);

            int[][] predicted = rnn.DecodeNBestCRF(seq, nbest);

            return(predicted);
        }
Beispiel #2
0
        static void LoadDataset(string strFileName, Featurizer featurizer, DataSet dataSet)
        {
            CheckCorpus(strFileName);

            StreamReader sr = new StreamReader(strFileName);
            int RecordCount = 0;

            while (true)
            {
                //Extract features from it and convert it into sequence
                Sentence sent = new Sentence(ReadRecord(sr));
                if (sent.TokensList.Count <= 2)
                {
                    //No more record, it only contain <s> and </s>
                    break;
                }

                Sequence seq = featurizer.ExtractFeatures(sent);

                //Set label for the sequence
                seq.SetLabel(sent, featurizer.TagSet);

                //Add the sequence into data set
                dataSet.SequenceList.Add(seq);

                //Show state at every 1000 record
                RecordCount++;
                if (RecordCount % 10000 == 0)
                {
                    Logger.WriteLine(Logger.Level.info, "{0}...", RecordCount);
                }
            }

            sr.Close();

        }
Beispiel #3
0
        static void LoadDataset(string strFileName, Featurizer featurizer, DataSet dataSet)
        {
            CheckCorpus(strFileName);

            StreamReader sr = new StreamReader(strFileName);
            int RecordCount = 0;

            while (true)
            {
                List<string> tokenList = ReadRecord(sr);
                if (tokenList.Count == 0)
                {
                    //No more record
                    break;
                }

                //Extract features from it and convert it into sequence
                Sentence sent = new Sentence();
                sent.SetFeatures(tokenList);
                Sequence seq = featurizer.ExtractFeatures(sent);

                //Set label for the sequence
                if (seq.SetLabel(sent, featurizer.GetTagSet()) == false)
                {
                    Console.WriteLine("Error: Invalidated record.");
                    sent.DumpFeatures();
                    continue;
                }

                //Add the sequence into data set
                dataSet.Add(seq);

                //Show state at every 1000 record
                RecordCount++;
                if (RecordCount % 10000 == 0)
                {
                    Console.Write("{0}...", RecordCount);
                }
            }

            Console.WriteLine();

            sr.Close();
        }
Beispiel #4
0
        public override int[] TestSeq2Seq(Sentence srcSentence, Featurizer featurizer)
        {
            State curState = featurizer.ExtractFeatures(new string[] { "<s>" });

            curState.Label = featurizer.TagSet.GetIndex("<s>");

            //Reset all layers
            foreach (SimpleLayer layer in HiddenLayerList)
            {
                layer.netReset(false);
            }

            //Extract features from source sentence
            Sequence srcSequence = featurizer.AutoEncoder.Featurizer.ExtractFeatures(srcSentence);

            double[] srcHiddenAvgOutput;
            Dictionary <int, float> srcSparseFeatures;

            ExtractSourceSentenceFeature(featurizer.AutoEncoder, srcSequence, curState.SparseFeature.Length, out srcHiddenAvgOutput, out srcSparseFeatures);

            int        numLayers = HiddenLayerList.Count;
            List <int> predicted = new List <int>();

            predicted.Add(curState.Label);
            while (true)
            {
                //Build sparse features
                SparseVector sparseVector = new SparseVector();
                sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize);
                sparseVector.AddKeyValuePairData(curState.SparseFeature);
                sparseVector.AddKeyValuePairData(srcSparseFeatures);

                //Compute first layer
                double[] denseFeatures = RNNHelper.ConcatenateVector(curState.DenseFeature, srcHiddenAvgOutput);
                HiddenLayerList[0].computeLayer(sparseVector, denseFeatures, false);

                //Compute middle layers
                for (int i = 1; i < numLayers; i++)
                {
                    //We use previous layer's output as dense feature for current layer
                    denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].cellOutput, srcHiddenAvgOutput);
                    HiddenLayerList[i].computeLayer(sparseVector, denseFeatures, false);
                }

                //Compute output layer
                denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].cellOutput, srcHiddenAvgOutput);
                OutputLayer.computeLayer(sparseVector, denseFeatures, false);

                OutputLayer.Softmax(false);

                int    nextTagId = OutputLayer.GetBestOutputIndex(false);
                string nextWord  = featurizer.TagSet.GetTagName(nextTagId);

                curState       = featurizer.ExtractFeatures(new string[] { nextWord });
                curState.Label = nextTagId;

                predicted.Add(nextTagId);

                if (nextWord == "</s>" || predicted.Count >= 100)
                {
                    break;
                }
            }

            return(predicted.ToArray());
        }