Ejemplo n.º 1
0
        public void TestEverything()
        {
            using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) {
                var mlParams = new TrainingParameters();

                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");

                var sdFactory = new SentenceDetectorFactory("en", true, null, null);
                var stream    = new SentenceSampleStream(new PlainTextByLineStream(file));

                var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams);

                Assert.AreEqual("en", model.Language);
                Assert.AreEqual(model.UseTokenEnd, true);

                var sMe = new SentenceDetectorME(model);

                // test the SharpNL sentences
                SentenceDetectorMETest.EvalSentences(sMe);

                var sFile = Path.GetTempFileName();

                model.Serialize(new FileStream(sFile, FileMode.Create));

                var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile));

                var jMe = new JavaSDME(jModel2);

                // test the Java OpenNLP sentences.
                JavaEvalSentences(jMe);

                // first try?! Yes! ;-)
            }
        }
Ejemplo n.º 2
0
 private static opennlp.tools.parser.ParserModel OpenJavaModel(string fileName)
 {
     java.io.FileInputStream inputStream = null;
     try {
         inputStream = OpenNLP.OpenInputStream(fileName);
         return(new opennlp.tools.parser.ParserModel(inputStream));
     } finally {
         inputStream?.close();
     }
 }
Ejemplo n.º 3
0
        private static opennlp.tools.chunker.ChunkSampleStream JavaSampleStream()
        {
            return(new opennlp.tools.chunker.ChunkSampleStream(
                       // we don't care about warnings on java
#pragma warning disable 612
                       new opennlp.tools.util.PlainTextByLineStream(
                           OpenNLP.OpenInputStream("opennlp/tools/chunker/test.txt"), "utf-8")));

#pragma warning restore 612
        }
Ejemplo n.º 4
0
 //
 // DO NOT USE THIS TESTS AS SAMPLES TO BUILD YOUR STUFF !
 //
 //  I use some things here, that are not needed in a "real" implementation
 //
 private static opennlp.tools.tokenize.TokenizerModel OpenJavaModel(string fileName)
 {
     java.io.FileInputStream inputStream = null;
     try {
         inputStream = OpenNLP.OpenInputStream(fileName);
         return(new opennlp.tools.tokenize.TokenizerModel(inputStream));
     } finally {
         if (inputStream != null)
         {
             inputStream.close();
         }
     }
 }
Ejemplo n.º 5
0
        public void TestCrossCompatibility()
        {
            using (var data = Tests.OpenFile("/opennlp/tools/tokenize/token.train")) {
                var samples  = new TokenSampleStream(new PlainTextByLineStream(data));
                var mlParams = new TrainingParameters();
                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");
                var model = TokenizerME.Train(samples, new TokenizerFactory("en", null, true), mlParams);

                var sMe = new TokenizerME(model);

                TokenizerMETest.TestTokenizer(sMe);

                var sProbs = sMe.TokenProbabilities;

                // --- java \/

                var sFile = Path.GetTempFileName();

                model.Serialize(new FileStream(sFile, FileMode.Create));

                var jModel = new opennlp.tools.tokenize.TokenizerModel(
                    OpenNLP.CreateInputStream(sFile)
                    );

                var jMe = new opennlp.tools.tokenize.TokenizerME(jModel);

                TestJavaTokenizer(jMe);

                var jProbs = jMe.getTokenProbabilities();

                Assert.AreEqual(jProbs.Length, sProbs.Length);

                for (int i = 0; i < jProbs.Length; i++)
                {
                    // one difference :(
                    // -0.00000000000000011102230246251565
                    //
                    // but still "insignificant" :)
                    Assert.AreEqual(jProbs[i], sProbs[i], 0.0000000001d);
                }
            }
        }
Ejemplo n.º 6
0
        public void TestCrossCompatibility()
        {
            var jModel = OpenJavaModel();
            var sModel = OpenSharpModel();

            var jFile = Path.GetTempFileName();
            var sFile = Path.GetTempFileName();

            var jFileStream = OpenNLP.CreateOutputStream(jFile);

            jModel.serialize(jFileStream);
            jFileStream.close();

            sModel.Serialize(new FileStream(sFile, FileMode.Create));

            // now java opens the csharp model and vice versa :)

            var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile));
            var sModel2 = new SharpModel(jFile);

            Assert.Null(jModel2.getAbbreviations());
            Assert.Null(sModel2.Abbreviations);

            Assert.Null(jModel2.getEosCharacters());
            Assert.Null(sModel2.EosCharacters);

            Assert.AreEqual(jModel2.useTokenEnd(), sModel2.UseTokenEnd);

            var jFactory2 = jModel2.getFactory();
            var sFactory2 = sModel2.Factory;

            Assert.AreEqual(jFactory2.isUseTokenEnd(), sFactory2.UseTokenEnd);
            Assert.AreEqual(jFactory2.getLanguageCode(), sFactory2.LanguageCode);

            Assert.True(true);
        }
Ejemplo n.º 7
0
 private static JavaModel OpenJavaModel()
 {
     return(new JavaModel(OpenNLP.OpenInputStream("opennlp/models/en-sent.bin")));
 }
Ejemplo n.º 8
0
 private static opennlp.tools.chunker.ChunkSampleStream JavaSampleStream()
 {
     return(new opennlp.tools.chunker.ChunkSampleStream(
                new opennlp.tools.util.PlainTextByLineStream(
                    OpenNLP.OpenInputStream("opennlp/tools/chunker/test.txt"), "utf-8")));
 }
Ejemplo n.º 9
0
 private static JavaModel OpenJavaModel()
 {
     return(new JavaModel(OpenNLP.OpenInputStream(ModeFile)));
 }