Example #1
0
        private static void LoadSeq2SeqDataSet(string strFileName, Config featurizer, DataSet <SequencePair> dataSet)
        {
            Logger.WriteLine("Loading data set for seq2seq2 training...");
            var sr          = new StreamReader(strFileName);
            var RecordCount = 0;

            while (true)
            {
                var sentPair = new SentencePair
                {
                    srcSentence = new Sentence(ReadRecord(sr)),
                    tgtSentence = new Sentence(ReadRecord(sr), false)
                };

                //Extract features from it and convert it into sequence

                if (sentPair.srcSentence.TokensList.Count <= 2 || sentPair.tgtSentence.TokensList.Count <= 0)
                {
                    //No more record, it only contain <s> and </s>
                    break;
                }

                var seq = featurizer.ExtractFeatures(sentPair);
                if (seq.tgtSequence.SetLabel(sentPair.tgtSentence, featurizer.TagSet))
                {
                    dataSet.SequenceList.Add(seq);

                    //Show state at every 1000 record
                    RecordCount++;
                    if (RecordCount % 10000 == 0)
                    {
                        Logger.WriteLine("{0}...", RecordCount);
                    }
                }
            }

            sr.Close();
        }
 public Result(SentencePair sentencePair, IEnumerable <(string fragment, bool highlight)> renderedHighlights, double similarity)