Exemplo n.º 1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model.
        /// </summary>
        /// <param name="sentenceModel">The sentence model.</param>
        /// <exception cref="System.ArgumentNullException">
        /// The <paramref name="sentenceModel"/> is <c>null</c>.
        /// </exception>
        public SentenceDetectorME(SentenceModel sentenceModel)
        {
            if (sentenceModel == null)
            {
                throw new ArgumentNullException(nameof(sentenceModel));
            }

            model       = sentenceModel.MaxentModel;
            cgen        = sentenceModel.Factory.GetContextGenerator();
            scanner     = sentenceModel.Factory.GetEndOfSentenceScanner();
            useTokenEnd = sentenceModel.UseTokenEnd;

            if (sentenceModel.Abbreviations == null)
            {
                return;
            }

            stringComparison = sentenceModel.Abbreviations.IsCaseSensitive
                ? StringComparison.Ordinal
                : StringComparison.OrdinalIgnoreCase;

            abbreviationTokens = new Dictionary <string, int>();

            foreach (var abbreviation in sentenceModel.Abbreviations)
            {
                foreach (var token in abbreviation.Tokens)
                {
                    abbreviationTokens.Add(token, token.Length);
                }
            }
        }
Exemplo n.º 2
0
 public SentenceDetectorME(SentenceModel sentenceModel)
 {
     model       = sentenceModel.MaxentModel;
     cgen        = sentenceModel.Factory.GetContextGenerator();
     scanner     = sentenceModel.Factory.GetEndOfSentenceScanner();
     useTokenEnd = sentenceModel.UseTokenEnd;
 }
        public void TestDefault() {

            var dic = LoadAbbDictionary();

            Assert.NotNull(dic);

            char[] eos = {'.', '?'};
            var sdModel = Train(new SentenceDetectorFactory("en", true, dic, eos));

            Assert.NotNull(sdModel);

            SentenceDetectorFactory factory = sdModel.Factory;

            Assert.NotNull(factory.AbbreviationDictionary);
            Assert.True(factory.GetContextGenerator() is DefaultSentenceContextGenerator);
            Assert.True(factory.GetEndOfSentenceScanner() is DefaultEndOfSentenceScanner);
            Assert.True(eos.SequenceEqual(factory.EOSCharacters));

            var o = new MemoryStream();

            sdModel.Serialize(new UnclosableStream(o));

            o.Seek(0, SeekOrigin.Begin);

            var fromSerialized = new SentenceModel(o);

            factory = fromSerialized.Factory;
            Assert.NotNull(factory.AbbreviationDictionary);
            Assert.True(factory.GetContextGenerator() is DefaultSentenceContextGenerator);
            Assert.True(factory.GetEndOfSentenceScanner() is DefaultEndOfSentenceScanner);
            Assert.True(eos.SequenceEqual(factory.EOSCharacters));
        }
Exemplo n.º 4
0
        /// <summary>
        /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model.
        /// </summary>
        /// <param name="sentenceModel">The sentence model.</param>
        /// <exception cref="System.ArgumentNullException">
        /// The <paramref name="sentenceModel"/> is <c>null</c>.
        /// </exception>
        public SentenceDetectorME(SentenceModel sentenceModel) {
            if (sentenceModel == null)
                throw new ArgumentNullException("sentenceModel");

            model = sentenceModel.MaxentModel;
            cgen = sentenceModel.Factory.GetContextGenerator();
            scanner = sentenceModel.Factory.GetEndOfSentenceScanner();
            useTokenEnd = sentenceModel.UseTokenEnd;

            if (sentenceModel.Abbreviations == null) 
                return;

            stringComparison = sentenceModel.Abbreviations.IsCaseSensitive
                ? StringComparison.Ordinal
                : StringComparison.OrdinalIgnoreCase;

            abbreviationTokens = new Dictionary<string, int>();

            foreach (var abbreviation in sentenceModel.Abbreviations)
                foreach (var token in abbreviation.Tokens)
                    abbreviationTokens.Add(token, token.Length);
        }
        public void TestCrossCompatibility()
        {
            var jModel = OpenJavaModel();
            var sModel = OpenSharpModel();

            var jFile = Path.GetTempFileName();
            var sFile = Path.GetTempFileName();

            var jFileStream = OpenNLP.CreateOutputStream(jFile);

            jModel.serialize(jFileStream);
            jFileStream.close();

            sModel.Serialize(new FileStream(sFile, FileMode.Create));

            // now java opens the csharp model and vice versa :)

            var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile));
            var sModel2 = new SharpModel(jFile);

            Assert.Null(jModel2.getAbbreviations());
            Assert.Null(sModel2.Abbreviations);

            Assert.Null(jModel2.getEosCharacters());
            Assert.Null(sModel2.EosCharacters);

            Assert.AreEqual(jModel2.useTokenEnd(), sModel2.UseTokenEnd);

            var jFactory2 = jModel2.getFactory();
            var sFactory2 = sModel2.Factory;

            Assert.AreEqual(jFactory2.isUseTokenEnd(), sFactory2.UseTokenEnd);
            Assert.AreEqual(jFactory2.getLanguageCode(), sFactory2.LanguageCode);

            Assert.True(true);
        }
        public void TestWithOpenNLPModel() {
            using (var file = Tests.OpenFile("/opennlp/models/en-sent.bin")) {

                var model = new SentenceModel(file);
                EvalSentences(new SentenceDetectorME(model));
            }
        }
Exemplo n.º 7
0
        public void TestCrossCompatibility() {
            var jModel = OpenJavaModel();
            var sModel = OpenSharpModel();

            var jFile = Path.GetTempFileName();
            var sFile = Path.GetTempFileName();

            var jFileStream = OpenNLP.CreateOutputStream(jFile);
            jModel.serialize(jFileStream);
            jFileStream.close();

            sModel.Serialize(new FileStream(sFile, FileMode.Create));

            // now java opens the csharp model and vice versa :)

            var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile));
            var sModel2 = new SharpModel(jFile);

            Assert.Null(jModel2.getAbbreviations());
            Assert.Null(sModel2.Abbreviations);

            Assert.Null(jModel2.getEosCharacters());
            Assert.Null(sModel2.EosCharacters);

            Assert.AreEqual(jModel2.useTokenEnd(), sModel2.UseTokenEnd);
                                                         
            var jFactory2 = jModel2.getFactory();
            var sFactory2 = sModel2.Factory;

            Assert.AreEqual(jFactory2.isUseTokenEnd(), sFactory2.UseTokenEnd);
            Assert.AreEqual(jFactory2.getLanguageCode(), sFactory2.LanguageCode);

            Assert.True(true);
        }