Пример #1
0
        public void TestReadingEvents()
        {
            var sample = new StringBuilder();

            // First sample sentence
            sample.Append("word11 tag11 pred11");
            sample.Append('\n');
            sample.Append("word12 tag12 pred12");
            sample.Append('\n');
            sample.Append("word13 tag13 pred13");
            sample.Append('\n');

            // Start next sample sentence
            sample.Append('\n');

            // Second sample sentence
            sample.Append("word21 tag21 pred21");
            sample.Append('\n');
            sample.Append("word22 tag22 pred22");
            sample.Append('\n');
            sample.Append("word23 tag23 pred23");
            sample.Append('\n');

            var stringStream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(sample.ToString())));

            var chunkStream = new ChunkSampleStream(stringStream);

            // read first sample
            var firstSample = chunkStream.Read();

            Assert.AreEqual("word11", firstSample.Sentence[0]);
            Assert.AreEqual("tag11", firstSample.Tags[0]);
            Assert.AreEqual("pred11", firstSample.Preds[0]);
            Assert.AreEqual("word12", firstSample.Sentence[1]);
            Assert.AreEqual("tag12", firstSample.Tags[1]);
            Assert.AreEqual("pred12", firstSample.Preds[1]);
            Assert.AreEqual("word13", firstSample.Sentence[2]);
            Assert.AreEqual("tag13", firstSample.Tags[2]);
            Assert.AreEqual("pred13", firstSample.Preds[2]);


            // read second sample
            ChunkSample secondSample = chunkStream.Read();

            Assert.AreEqual("word21", secondSample.Sentence[0]);
            Assert.AreEqual("tag21", secondSample.Tags[0]);
            Assert.AreEqual("pred21", secondSample.Preds[0]);
            Assert.AreEqual("word22", secondSample.Sentence[1]);
            Assert.AreEqual("tag22", secondSample.Tags[1]);
            Assert.AreEqual("pred22", secondSample.Preds[1]);
            Assert.AreEqual("word23", secondSample.Sentence[2]);
            Assert.AreEqual("tag23", secondSample.Tags[2]);
            Assert.AreEqual("pred23", secondSample.Preds[2]);

            Assert.Null(chunkStream.Read());
        }
Пример #2
0
        public void TestReadingEvents() {
            var sample = new StringBuilder();

            // First sample sentence
            sample.Append("word11 tag11 pred11");
            sample.Append('\n');
            sample.Append("word12 tag12 pred12");
            sample.Append('\n');
            sample.Append("word13 tag13 pred13");
            sample.Append('\n');

            // Start next sample sentence
            sample.Append('\n');

            // Second sample sentence
            sample.Append("word21 tag21 pred21");
            sample.Append('\n');
            sample.Append("word22 tag22 pred22");
            sample.Append('\n');
            sample.Append("word23 tag23 pred23");
            sample.Append('\n');

            var stringStream = new PlainTextByLineStream(new MemoryStream(Encoding.UTF8.GetBytes(sample.ToString())));

            var chunkStream = new ChunkSampleStream(stringStream);

            // read first sample
            var firstSample = chunkStream.Read();
            Assert.AreEqual("word11", firstSample.Sentence[0]);
            Assert.AreEqual("tag11", firstSample.Tags[0]);
            Assert.AreEqual("pred11", firstSample.Preds[0]);
            Assert.AreEqual("word12", firstSample.Sentence[1]);
            Assert.AreEqual("tag12", firstSample.Tags[1]);
            Assert.AreEqual("pred12", firstSample.Preds[1]);
            Assert.AreEqual("word13", firstSample.Sentence[2]);
            Assert.AreEqual("tag13", firstSample.Tags[2]);
            Assert.AreEqual("pred13", firstSample.Preds[2]);


            // read second sample
            ChunkSample secondSample = chunkStream.Read();
            Assert.AreEqual("word21", secondSample.Sentence[0]);
            Assert.AreEqual("tag21", secondSample.Tags[0]);
            Assert.AreEqual("pred21", secondSample.Preds[0]);
            Assert.AreEqual("word22", secondSample.Sentence[1]);
            Assert.AreEqual("tag22", secondSample.Tags[1]);
            Assert.AreEqual("pred22", secondSample.Preds[1]);
            Assert.AreEqual("word23", secondSample.Sentence[2]);
            Assert.AreEqual("tag23", secondSample.Tags[2]);
            Assert.AreEqual("pred23", secondSample.Preds[2]);

            Assert.Null(chunkStream.Read());
        }
Пример #3
0
        public static ChunkerModel TrainModel(string path)
        {
            FileStream        fs     = new FileStream(path, FileMode.Open, FileAccess.Read);
            ChunkSampleStream stream = new ChunkSampleStream(new PlainTextByLineStream(fs));

            TrainingParameters trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Iterations, "70");
            trainParams.Set(Parameters.Cutoff, "1");

            return(ChunkerME.Train(TRAINING_LANGUAGE, stream, trainParams, new ChunkerFactory()));
        }