public void TestEverything() { using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) { var mlParams = new TrainingParameters(); mlParams.Set(Parameters.Iterations, "100"); mlParams.Set(Parameters.Cutoff, "0"); var sdFactory = new SentenceDetectorFactory("en", true, null, null); var stream = new SentenceSampleStream(new PlainTextByLineStream(file)); var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams); Assert.AreEqual("en", model.Language); Assert.AreEqual(model.UseTokenEnd, true); var sMe = new SentenceDetectorME(model); // test the SharpNL sentences SentenceDetectorMETest.EvalSentences(sMe); var sFile = Path.GetTempFileName(); model.Serialize(new FileStream(sFile, FileMode.Create)); var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile)); var jMe = new JavaSDME(jModel2); // test the Java OpenNLP sentences. JavaEvalSentences(jMe); // first try?! Yes! ;-) } }
public void TestCrossCompatibility() { using (var data = Tests.OpenFile("/opennlp/tools/tokenize/token.train")) { var samples = new TokenSampleStream(new PlainTextByLineStream(data)); var mlParams = new TrainingParameters(); mlParams.Set(Parameters.Iterations, "100"); mlParams.Set(Parameters.Cutoff, "0"); var model = TokenizerME.Train(samples, new TokenizerFactory("en", null, true), mlParams); var sMe = new TokenizerME(model); TokenizerMETest.TestTokenizer(sMe); var sProbs = sMe.TokenProbabilities; // --- java \/ var sFile = Path.GetTempFileName(); model.Serialize(new FileStream(sFile, FileMode.Create)); var jModel = new opennlp.tools.tokenize.TokenizerModel( OpenNLP.CreateInputStream(sFile) ); var jMe = new opennlp.tools.tokenize.TokenizerME(jModel); TestJavaTokenizer(jMe); var jProbs = jMe.getTokenProbabilities(); Assert.AreEqual(jProbs.Length, sProbs.Length); for (int i = 0; i < jProbs.Length; i++) { // one difference :( // -0.00000000000000011102230246251565 // // but still "insignificant" :) Assert.AreEqual(jProbs[i], sProbs[i], 0.0000000001d); } } }
public void TestCrossCompatibility() { var jModel = OpenJavaModel(); var sModel = OpenSharpModel(); var jFile = Path.GetTempFileName(); var sFile = Path.GetTempFileName(); var jFileStream = OpenNLP.CreateOutputStream(jFile); jModel.serialize(jFileStream); jFileStream.close(); sModel.Serialize(new FileStream(sFile, FileMode.Create)); // now java opens the csharp model and vice versa :) var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile)); var sModel2 = new SharpModel(jFile); Assert.Null(jModel2.getAbbreviations()); Assert.Null(sModel2.Abbreviations); Assert.Null(jModel2.getEosCharacters()); Assert.Null(sModel2.EosCharacters); Assert.AreEqual(jModel2.useTokenEnd(), sModel2.UseTokenEnd); var jFactory2 = jModel2.getFactory(); var sFactory2 = sModel2.Factory; Assert.AreEqual(jFactory2.isUseTokenEnd(), sFactory2.UseTokenEnd); Assert.AreEqual(jFactory2.getLanguageCode(), sFactory2.LanguageCode); Assert.True(true); }