public void TestReadingEvents() { var sample = new StringBuilder(); // First sample sentence sample.Append("word11 tag11 pred11"); sample.Append('\n'); sample.Append("word12 tag12 pred12"); sample.Append('\n'); sample.Append("word13 tag13 pred13"); sample.Append('\n'); // Start next sample sentence sample.Append('\n'); // Second sample sentence sample.Append("word21 tag21 pred21"); sample.Append('\n'); sample.Append("word22 tag22 pred22"); sample.Append('\n'); sample.Append("word23 tag23 pred23"); sample.Append('\n'); var stringStream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(sample.ToString()))); var chunkStream = new ChunkSampleStream(stringStream); // read first sample var firstSample = chunkStream.Read(); Assert.AreEqual("word11", firstSample.Sentence[0]); Assert.AreEqual("tag11", firstSample.Tags[0]); Assert.AreEqual("pred11", firstSample.Preds[0]); Assert.AreEqual("word12", firstSample.Sentence[1]); Assert.AreEqual("tag12", firstSample.Tags[1]); Assert.AreEqual("pred12", firstSample.Preds[1]); Assert.AreEqual("word13", firstSample.Sentence[2]); Assert.AreEqual("tag13", firstSample.Tags[2]); Assert.AreEqual("pred13", firstSample.Preds[2]); // read second sample ChunkSample secondSample = chunkStream.Read(); Assert.AreEqual("word21", secondSample.Sentence[0]); Assert.AreEqual("tag21", secondSample.Tags[0]); Assert.AreEqual("pred21", secondSample.Preds[0]); Assert.AreEqual("word22", secondSample.Sentence[1]); Assert.AreEqual("tag22", secondSample.Tags[1]); Assert.AreEqual("pred22", secondSample.Preds[1]); Assert.AreEqual("word23", secondSample.Sentence[2]); Assert.AreEqual("tag23", secondSample.Tags[2]); Assert.AreEqual("pred23", secondSample.Preds[2]); Assert.Null(chunkStream.Read()); }
public static ChunkerModel TrainModel(string path) { FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read); ChunkSampleStream stream = new ChunkSampleStream(new PlainTextByLineStream(fs)); TrainingParameters trainParams = new TrainingParameters(); trainParams.Set(Parameters.Iterations, "70"); trainParams.Set(Parameters.Cutoff, "1"); return(ChunkerME.Train(TRAINING_LANGUAGE, stream, trainParams, new ChunkerFactory())); }