private static void LoadSeq2SeqDataSet(string strFileName, Config featurizer, DataSet <SequencePair> dataSet) { Logger.WriteLine("Loading data set for seq2seq2 training..."); var sr = new StreamReader(strFileName); var RecordCount = 0; while (true) { var sentPair = new SentencePair { srcSentence = new Sentence(ReadRecord(sr)), tgtSentence = new Sentence(ReadRecord(sr), false) }; //Extract features from it and convert it into sequence if (sentPair.srcSentence.TokensList.Count <= 2 || sentPair.tgtSentence.TokensList.Count <= 0) { //No more record, it only contain <s> and </s> break; } var seq = featurizer.ExtractFeatures(sentPair); if (seq.tgtSequence.SetLabel(sentPair.tgtSentence, featurizer.TagSet)) { dataSet.SequenceList.Add(seq); //Show state at every 1000 record RecordCount++; if (RecordCount % 10000 == 0) { Logger.WriteLine("{0}...", RecordCount); } } } sr.Close(); }
public Result(SentencePair sentencePair, IEnumerable <(string fragment, bool highlight)> renderedHighlights, double similarity)