/// <summary> /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model. /// </summary> /// <param name="sentenceModel">The sentence model.</param> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="sentenceModel"/> is <c>null</c>. /// </exception> public SentenceDetectorME(SentenceModel sentenceModel) { if (sentenceModel == null) { throw new ArgumentNullException(nameof(sentenceModel)); } model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; if (sentenceModel.Abbreviations == null) { return; } stringComparison = sentenceModel.Abbreviations.IsCaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; abbreviationTokens = new Dictionary <string, int>(); foreach (var abbreviation in sentenceModel.Abbreviations) { foreach (var token in abbreviation.Tokens) { abbreviationTokens.Add(token, token.Length); } } }
public SentenceDetectorME(SentenceModel sentenceModel) { model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; }
public void TestDefault() { var dic = LoadAbbDictionary(); Assert.NotNull(dic); char[] eos = {'.', '?'}; var sdModel = Train(new SentenceDetectorFactory("en", true, dic, eos)); Assert.NotNull(sdModel); SentenceDetectorFactory factory = sdModel.Factory; Assert.NotNull(factory.AbbreviationDictionary); Assert.True(factory.GetContextGenerator() is DefaultSentenceContextGenerator); Assert.True(factory.GetEndOfSentenceScanner() is DefaultEndOfSentenceScanner); Assert.True(eos.SequenceEqual(factory.EOSCharacters)); var o = new MemoryStream(); sdModel.Serialize(new UnclosableStream(o)); o.Seek(0, SeekOrigin.Begin); var fromSerialized = new SentenceModel(o); factory = fromSerialized.Factory; Assert.NotNull(factory.AbbreviationDictionary); Assert.True(factory.GetContextGenerator() is DefaultSentenceContextGenerator); Assert.True(factory.GetEndOfSentenceScanner() is DefaultEndOfSentenceScanner); Assert.True(eos.SequenceEqual(factory.EOSCharacters)); }
/// <summary> /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model. /// </summary> /// <param name="sentenceModel">The sentence model.</param> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="sentenceModel"/> is <c>null</c>. /// </exception> public SentenceDetectorME(SentenceModel sentenceModel) { if (sentenceModel == null) throw new ArgumentNullException("sentenceModel"); model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; if (sentenceModel.Abbreviations == null) return; stringComparison = sentenceModel.Abbreviations.IsCaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; abbreviationTokens = new Dictionary<string, int>(); foreach (var abbreviation in sentenceModel.Abbreviations) foreach (var token in abbreviation.Tokens) abbreviationTokens.Add(token, token.Length); }
public void TestCrossCompatibility() { var jModel = OpenJavaModel(); var sModel = OpenSharpModel(); var jFile = Path.GetTempFileName(); var sFile = Path.GetTempFileName(); var jFileStream = OpenNLP.CreateOutputStream(jFile); jModel.serialize(jFileStream); jFileStream.close(); sModel.Serialize(new FileStream(sFile, FileMode.Create)); // now java opens the csharp model and vice versa :) var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile)); var sModel2 = new SharpModel(jFile); Assert.Null(jModel2.getAbbreviations()); Assert.Null(sModel2.Abbreviations); Assert.Null(jModel2.getEosCharacters()); Assert.Null(sModel2.EosCharacters); Assert.AreEqual(jModel2.useTokenEnd(), sModel2.UseTokenEnd); var jFactory2 = jModel2.getFactory(); var sFactory2 = sModel2.Factory; Assert.AreEqual(jFactory2.isUseTokenEnd(), sFactory2.UseTokenEnd); Assert.AreEqual(jFactory2.getLanguageCode(), sFactory2.LanguageCode); Assert.True(true); }
public void TestWithOpenNLPModel() { using (var file = Tests.OpenFile("/opennlp/models/en-sent.bin")) { var model = new SentenceModel(file); EvalSentences(new SentenceDetectorME(model)); } }
public void TestCrossCompatibility() { var jModel = OpenJavaModel(); var sModel = OpenSharpModel(); var jFile = Path.GetTempFileName(); var sFile = Path.GetTempFileName(); var jFileStream = OpenNLP.CreateOutputStream(jFile); jModel.serialize(jFileStream); jFileStream.close(); sModel.Serialize(new FileStream(sFile, FileMode.Create)); // now java opens the csharp model and vice versa :) var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile)); var sModel2 = new SharpModel(jFile); Assert.Null(jModel2.getAbbreviations()); Assert.Null(sModel2.Abbreviations); Assert.Null(jModel2.getEosCharacters()); Assert.Null(sModel2.EosCharacters); Assert.AreEqual(jModel2.useTokenEnd(), sModel2.UseTokenEnd); var jFactory2 = jModel2.getFactory(); var sFactory2 = sModel2.Factory; Assert.AreEqual(jFactory2.isUseTokenEnd(), sFactory2.UseTokenEnd); Assert.AreEqual(jFactory2.getLanguageCode(), sFactory2.LanguageCode); Assert.True(true); }