Example #1
0
        public void TestOnlyWithNamesWithTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize(
                    "Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman Robert Aderholt " +
                    "Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(2, 4, "person"), names[1]);
                Assert.AreEqual(new Span(4, 6, "person"), names[2]);
                Assert.True(!HasOtherAsOutcome(model));
            }
        }
Example #2
0
        public void TestOnlyWithEntitiesWithTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize("NATO United States Barack Obama");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 1, "organization"), names[0]);
                Assert.AreEqual(new Span(1, 3, "location"), names[1]);
                Assert.AreEqual(new Span(3, 5, "person"), names[2]);
                Assert.False(HasOtherAsOutcome(model));
            }
        }
Example #3
0
        public void TestHtmlNameSampleParsing()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/html1.train")) {
                var ds = new NameSampleStream(new PlainTextByLineStream(file));

                NameSample ns = ds.Read();

                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<html>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<head/>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<body>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<ul>", ns.Sentence[0]);

                // <li> <START:organization> Advanced Integrated Pest Management <END> </li>
                ns = ds.Read();
                Assert.AreEqual(6, ns.Sentence.Length);
                Assert.AreEqual("<li>", ns.Sentence[0]);
                Assert.AreEqual("Advanced", ns.Sentence[1]);
                Assert.AreEqual("Integrated", ns.Sentence[2]);
                Assert.AreEqual("Pest", ns.Sentence[3]);
                Assert.AreEqual("Management", ns.Sentence[4]);
                Assert.AreEqual("</li>", ns.Sentence[5]);
                Assert.AreEqual(new Span(1, 5, organization), ns.Names[0]);

                // <li> <START:organization> Bay Cities Produce Co., Inc. <END> </li>
                ns = ds.Read();
                Assert.AreEqual(7, ns.Sentence.Length);
                Assert.AreEqual("<li>", ns.Sentence[0]);
                Assert.AreEqual("Bay", ns.Sentence[1]);
                Assert.AreEqual("Cities", ns.Sentence[2]);
                Assert.AreEqual("Produce", ns.Sentence[3]);
                Assert.AreEqual("Co.,", ns.Sentence[4]);
                Assert.AreEqual("Inc.", ns.Sentence[5]);
                Assert.AreEqual("</li>", ns.Sentence[6]);
                Assert.AreEqual(new Span(1, 6, organization), ns.Names[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</ul>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</body>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</html>", ns.Sentence[0]);

                Assert.Null(ds.Read());
            }
        }
Example #4
0
        public void TestWithNameTypeAndInvalidData2()
        {
            var sampleStream = new NameSampleStream(new GenericObjectStream <string>(
                                                        "<START:street> <START:person> Name <END> <END>"));

            sampleStream.Read();
        }
Example #5
0
        public void TestWithoutNameTypeAndInvalidData1()
        {
            var sampleStream = new NameSampleStream(new GenericObjectStream <string>(
                                                        "<START> <START> Name <END>"));

            sampleStream.Read();
        }
Example #6
0
        public void TestNameFinderWithMultipleTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new [] { "U", ".", "S", ".", "President", "Barack", "Obama", "has",
                                        "arrived", "in", "South", "Korea", ",", "where", "he", "is", "expected", "to",
                                        "show", "solidarity", "with", "the", "country", "'", "s", "president", "in",
                                        "demanding", "North", "Korea", "move", "toward", "ending", "its", "nuclear",
                                        "weapons", "programs", "." };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(4, names.Length);
                Assert.AreEqual(new Span(0, 4, "location"), names[0]);
                Assert.AreEqual(new Span(5, 7, "person"), names[1]);
                Assert.AreEqual(new Span(10, 12, "location"), names[2]);
                Assert.AreEqual(new Span(28, 30, "location"), names[3]);

                /*
                 * These asserts are not needed because the equality comparer handles the Type
                 * assertEquals("location", names1[0].getType());
                 * assertEquals("person", names1[1].getType());
                 * assertEquals("location", names1[2].getType());
                 * assertEquals("location", names1[3].getType());
                 */

                sentence = new[] {
                    "Scott", "Snyder", "is", "the", "director", "of", "the",
                    "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(7, 15, "organization"), names[1]);

                /*
                 *
                 * assertEquals("person", names2[0].getType());
                 * assertEquals("organization", names2[1].getType());
                 *
                 */
            }
        }
        public void TestWithoutNameTypeAndInvalidData1()
        {
            Assert.Throws <InvalidOperationException> (() => {
                var sampleStream = new NameSampleStream(new GenericObjectStream <string> (
                                                            "<START> <START> Name <END>"));

                sampleStream.Read();
            });
        }
Example #8
0
        public void TestNameFinder() {

            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new[] {
                    "Alisa",
                    "appreciated",
                    "the",
                    "hint",
                    "and",
                    "enjoyed",
                    "a",
                    "delicious",
                    "traditional",
                    "meal."
                };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(1, names.Length);
                Assert.AreEqual(new Span(0, 1, Type), names[0]);

                sentence = new[] {
                    "Hi",
                    "Mike",
                    ",",
                    "it's",
                    "Stefanie",
                    "Schmidt",
                    "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(1, 2, Type), names[0]);
                Assert.AreEqual(new Span(4, 6, Type), names[1]);

            }
        }
Example #9
0
        public void TestNameFinder()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new[] {
                    "Alisa",
                    "appreciated",
                    "the",
                    "hint",
                    "and",
                    "enjoyed",
                    "a",
                    "delicious",
                    "traditional",
                    "meal."
                };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(1, names.Length);
                Assert.AreEqual(new Span(0, 1, Type), names[0]);

                sentence = new[] {
                    "Hi",
                    "Mike",
                    ",",
                    "it's",
                    "Stefanie",
                    "Schmidt",
                    "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(1, 2, Type), names[0]);
                Assert.AreEqual(new Span(4, 6, Type), names[1]);
            }
        }
Example #10
0
        public void TestWithoutNameTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream  = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));
                var expectedNames = new[] {
                    "Alan McKennedy", "Julie", "Marie Clara",
                    "Stefanie Schmidt", "Mike", "Stefanie Schmidt", "George", "Luise",
                    "George Bauer", "Alisa Fernandes", "Alisa", "Mike Sander",
                    "Stefan Miller", "Stefan Miller", "Stefan Miller", "Elenor Meier",
                    "Gina Schneider", "Bruno Schulz", "Michel Seile", "George Miller",
                    "Miller", "Peter Schubert", "Natalie"
                };

                var        names = new List <string>();
                var        spans = new List <Span>();
                NameSample ns;
                while ((ns = sampleStream.Read()) != null)
                {
                    foreach (var name in ns.Names)
                    {
                        names.Add(name.GetCoveredText(ns.Sentence));
                        spans.Add(name);
                    }
                }

                Assert.AreEqual(expectedNames.Length, names.Count);
                Assert.AreEqual(createDefaultSpan(6, 8), spans[0]);
                Assert.AreEqual(createDefaultSpan(3, 4), spans[1]);
                Assert.AreEqual(createDefaultSpan(1, 3), spans[2]);
                Assert.AreEqual(createDefaultSpan(4, 6), spans[3]);
                Assert.AreEqual(createDefaultSpan(1, 2), spans[4]);
                Assert.AreEqual(createDefaultSpan(4, 6), spans[5]);
                Assert.AreEqual(createDefaultSpan(2, 3), spans[6]);
                Assert.AreEqual(createDefaultSpan(16, 17), spans[7]);
                Assert.AreEqual(createDefaultSpan(18, 20), spans[8]);
                Assert.AreEqual(createDefaultSpan(0, 2), spans[9]);
                Assert.AreEqual(createDefaultSpan(0, 1), spans[10]);
                Assert.AreEqual(createDefaultSpan(3, 5), spans[11]);
                Assert.AreEqual(createDefaultSpan(3, 5), spans[12]);
                Assert.AreEqual(createDefaultSpan(10, 12), spans[13]);
                Assert.AreEqual(createDefaultSpan(1, 3), spans[14]);
                Assert.AreEqual(createDefaultSpan(6, 8), spans[15]);
                Assert.AreEqual(createDefaultSpan(6, 8), spans[16]);
                Assert.AreEqual(createDefaultSpan(8, 10), spans[17]);
                Assert.AreEqual(createDefaultSpan(12, 14), spans[18]);
                Assert.AreEqual(createDefaultSpan(1, 3), spans[19]);
                Assert.AreEqual(createDefaultSpan(0, 1), spans[20]);
                Assert.AreEqual(createDefaultSpan(2, 4), spans[21]);
                Assert.AreEqual(createDefaultSpan(5, 6), spans[22]);
            }
        }
        public void TestWithoutNameTypes() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));
                var expectedNames = new[] {
                    "Alan McKennedy", "Julie", "Marie Clara",
                    "Stefanie Schmidt", "Mike", "Stefanie Schmidt", "George", "Luise",
                    "George Bauer", "Alisa Fernandes", "Alisa", "Mike Sander",
                    "Stefan Miller", "Stefan Miller", "Stefan Miller", "Elenor Meier",
                    "Gina Schneider", "Bruno Schulz", "Michel Seile", "George Miller",
                    "Miller", "Peter Schubert", "Natalie"
                };

                var names = new List<string>();
                var spans = new List<Span>();
                NameSample ns;
                while ((ns = sampleStream.Read()) != null) {
                    foreach (var name in ns.Names) {                       
                        names.Add(name.GetCoveredText(ns.Sentence));
                        spans.Add(name);
                    }
                    
                }

                Assert.AreEqual(expectedNames.Length, names.Count);
                Assert.AreEqual(createDefaultSpan(6, 8), spans[0]);
                Assert.AreEqual(createDefaultSpan(3, 4), spans[1]);
                Assert.AreEqual(createDefaultSpan(1, 3), spans[2]);
                Assert.AreEqual(createDefaultSpan(4, 6), spans[3]);
                Assert.AreEqual(createDefaultSpan(1, 2), spans[4]);
                Assert.AreEqual(createDefaultSpan(4, 6), spans[5]);
                Assert.AreEqual(createDefaultSpan(2, 3), spans[6]);
                Assert.AreEqual(createDefaultSpan(16, 17), spans[7]);
                Assert.AreEqual(createDefaultSpan(18, 20), spans[8]);
                Assert.AreEqual(createDefaultSpan(0, 2), spans[9]);
                Assert.AreEqual(createDefaultSpan(0, 1), spans[10]);
                Assert.AreEqual(createDefaultSpan(3, 5), spans[11]);
                Assert.AreEqual(createDefaultSpan(3, 5), spans[12]);
                Assert.AreEqual(createDefaultSpan(10, 12), spans[13]);
                Assert.AreEqual(createDefaultSpan(1, 3), spans[14]);
                Assert.AreEqual(createDefaultSpan(6, 8), spans[15]);
                Assert.AreEqual(createDefaultSpan(6, 8), spans[16]);
                Assert.AreEqual(createDefaultSpan(8, 10), spans[17]);
                Assert.AreEqual(createDefaultSpan(12, 14), spans[18]);
                Assert.AreEqual(createDefaultSpan(1, 3), spans[19]);
                Assert.AreEqual(createDefaultSpan(0, 1), spans[20]);
                Assert.AreEqual(createDefaultSpan(2, 4), spans[21]);
                Assert.AreEqual(createDefaultSpan(5, 6), spans[22]);
            }
        }
Example #12
0
        public void TestWithNameEvaluationErrorListener()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));

                var parameters = new TrainingParameters();
                parameters.Set(Parameters.Iterations, "70");
                parameters.Set(Parameters.Cutoff, "1");
                parameters.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt);

                var cv = new TokenNameFinderCrossValidator("en", Type, parameters, new NameEvaluationErrorListener());

                cv.Evaluate(sampleStream, 2);

                Assert.NotNull(cv.FMeasure);
            }
        }
        public void TestWithNameEvaluationErrorListener() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));

                var parameters = new TrainingParameters();
                parameters.Set(Parameters.Iterations, "70");
                parameters.Set(Parameters.Cutoff, "1");
                parameters.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt);

                var cv = new TokenNameFinderCrossValidator("en", Type, parameters, new NameEvaluationErrorListener());

                cv.Evaluate(sampleStream, 2);

                Assert.NotNull(cv.FMeasure);
            }

        }
Example #14
0
        public void TestClearAdaptiveData()
        {
            var trainingData = new StringBuilder();

            trainingData.Append("a\n");
            trainingData.Append("b\n");
            trainingData.Append("c\n");
            trainingData.Append("\n");
            trainingData.Append("d\n");

            var untokenizedLineStream = new PlainTextByLineStream(new StringReader(trainingData.ToString()));
            var trainingStream        = new NameSampleStream(untokenizedLineStream);

            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.True(trainingStream.Read().ClearAdaptiveData);
            Assert.Null(trainingStream.Read());
        }
        public void TestWithoutNameTypeAndInvalidData1() {
            var sampleStream = new NameSampleStream(new GenericObjectStream<string>(
                "<START> <START> Name <END>"));

            sampleStream.Read();
        }
Example #16
0
        public void TestOnlyWithEntitiesWithTypes() {

            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize("NATO United States Barack Obama");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 1, "organization"), names[0]);
                Assert.AreEqual(new Span(1, 3, "location"), names[1]);
                Assert.AreEqual(new Span(3, 5, "person"), names[2]);
                Assert.False(HasOtherAsOutcome(model));
            }
        }
Example #17
0
        public void TestNameFinderWithMultipleTypes() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new [] { "U", ".", "S", ".", "President", "Barack", "Obama", "has", 
                    "arrived", "in", "South", "Korea", ",", "where", "he", "is", "expected", "to", 
                    "show", "solidarity", "with", "the", "country", "'", "s", "president", "in",
                    "demanding", "North", "Korea", "move", "toward", "ending", "its", "nuclear", 
                    "weapons", "programs", "." };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(4, names.Length);
                Assert.AreEqual(new Span(0, 4, "location"), names[0]);
                Assert.AreEqual(new Span(5, 7, "person"), names[1]);
                Assert.AreEqual(new Span(10, 12, "location"), names[2]);
                Assert.AreEqual(new Span(28, 30, "location"), names[3]);

                /*
                These asserts are not needed because the equality comparer handles the Type 
                assertEquals("location", names1[0].getType());
                assertEquals("person", names1[1].getType());
                assertEquals("location", names1[2].getType());
                assertEquals("location", names1[3].getType());
                 */

                sentence = new[] {
                    "Scott", "Snyder", "is", "the", "director", "of", "the", 
                    "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(7, 15, "organization"), names[1]);

                /* 
                 
                assertEquals("person", names2[0].getType());
                assertEquals("organization", names2[1].getType());
                 
                */
            }
        }           
Example #18
0
        public void TestWithNameTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));
                var names        = new Dictionary <string, List <string> >();
                var spans        = new Dictionary <string, List <Span> >();

                NameSample ns;
                while ((ns = sampleStream.Read()) != null)
                {
                    foreach (var nameSpan in ns.Names)
                    {
                        if (!names.ContainsKey(nameSpan.Type))
                        {
                            names.Add(nameSpan.Type, new List <string>());
                            spans.Add(nameSpan.Type, new List <Span>());
                        }
                        names[nameSpan.Type].Add(nameSpan.GetCoveredText(ns.Sentence));
                        spans[nameSpan.Type].Add(nameSpan);
                    }
                }

                string[] expectedPerson =
                {
                    "Barack Obama",    "Obama", "Obama",
                    "Lee Myung - bak", "Obama", "Obama",     "Scott Snyder",  "Snyder", "Obama",
                    "Obama",           "Obama", "Tim Peters","Obama",         "Peters"
                };

                string[] expectedDate = { "Wednesday", "Thursday", "Wednesday" };

                string[] expectedLocation =
                {
                    "U . S .",     "South Korea",   "North Korea",
                    "China",       "South Korea",   "North Korea","North Korea",  "U . S .",
                    "South Korea", "United States", "Pyongyang",  "North Korea",
                    "South Korea", "Afghanistan",   "Seoul",      "U . S .",      "China"
                };

                string[] expectedOrganization = { "Center for U . S . Korea Policy" };

                Assert.AreEqual(expectedPerson.Length, names[person].Count);
                Assert.AreEqual(expectedDate.Length, names[date].Count);
                Assert.AreEqual(expectedLocation.Length, names[location].Count);
                Assert.AreEqual(expectedOrganization.Length, names[organization].Count);

                Assert.AreEqual(new Span(5, 7, person), spans[person][0]);
                Assert.AreEqual(expectedPerson[0], names[person][0]);
                Assert.AreEqual(new Span(10, 11, person), spans[person][1]);
                Assert.AreEqual(expectedPerson[1], names[person][1]);
                Assert.AreEqual(new Span(29, 30, person), spans[person][2]);
                Assert.AreEqual(expectedPerson[2], names[person][2]);
                Assert.AreEqual(new Span(23, 27, person), spans[person][3]);
                Assert.AreEqual(expectedPerson[3], names[person][3]);
                Assert.AreEqual(new Span(1, 2, person), spans[person][4]);
                Assert.AreEqual(expectedPerson[4], names[person][4]);
                Assert.AreEqual(new Span(8, 9, person), spans[person][5]);
                Assert.AreEqual(expectedPerson[5], names[person][5]);
                Assert.AreEqual(new Span(0, 2, person), spans[person][6]);
                Assert.AreEqual(expectedPerson[6], names[person][6]);
                Assert.AreEqual(new Span(25, 26, person), spans[person][7]);
                Assert.AreEqual(expectedPerson[7], names[person][7]);
                Assert.AreEqual(new Span(1, 2, person), spans[person][8]);
                Assert.AreEqual(expectedPerson[8], names[person][8]);
                Assert.AreEqual(new Span(6, 7, person), spans[person][9]);
                Assert.AreEqual(expectedPerson[9], names[person][9]);
                Assert.AreEqual(new Span(14, 15, person), spans[person][10]);
                Assert.AreEqual(expectedPerson[10], names[person][10]);
                Assert.AreEqual(new Span(0, 2, person), spans[person][11]);
                Assert.AreEqual(expectedPerson[11], names[person][11]);
                Assert.AreEqual(new Span(12, 13, person), spans[person][12]);
                Assert.AreEqual(expectedPerson[12], names[person][12]);
                Assert.AreEqual(new Span(12, 13, person), spans[person][13]);
                Assert.AreEqual(expectedPerson[13], names[person][13]);

                Assert.AreEqual(new Span(7, 8, date), spans[date][0]);
                Assert.AreEqual(expectedDate[0], names[date][0]);
                Assert.AreEqual(new Span(27, 28, date), spans[date][1]);
                Assert.AreEqual(expectedDate[1], names[date][1]);
                Assert.AreEqual(new Span(15, 16, date), spans[date][2]);
                Assert.AreEqual(expectedDate[2], names[date][2]);

                Assert.AreEqual(new Span(0, 4, location), spans[location][0]);
                Assert.AreEqual(expectedLocation[0], names[location][0]);
                Assert.AreEqual(new Span(10, 12, location), spans[location][1]);
                Assert.AreEqual(expectedLocation[1], names[location][1]);
                Assert.AreEqual(new Span(28, 30, location), spans[location][2]);
                Assert.AreEqual(expectedLocation[2], names[location][2]);
                Assert.AreEqual(new Span(3, 4, location), spans[location][3]);
                Assert.AreEqual(expectedLocation[3], names[location][3]);
                Assert.AreEqual(new Span(5, 7, location), spans[location][4]);
                Assert.AreEqual(expectedLocation[4], names[location][4]);
                Assert.AreEqual(new Span(16, 18, location), spans[location][5]);
                Assert.AreEqual(expectedLocation[5], names[location][5]);
                Assert.AreEqual(new Span(1, 3, location), spans[location][6]);
                Assert.AreEqual(expectedLocation[6], names[location][6]);
                Assert.AreEqual(new Span(5, 9, location), spans[location][7]);
                Assert.AreEqual(expectedLocation[7], names[location][7]);
                Assert.AreEqual(new Span(0, 2, location), spans[location][8]);
                Assert.AreEqual(expectedLocation[8], names[location][8]);
                Assert.AreEqual(new Span(4, 6, location), spans[location][9]);
                Assert.AreEqual(expectedLocation[9], names[location][9]);
                Assert.AreEqual(new Span(10, 11, location), spans[location][10]);
                Assert.AreEqual(expectedLocation[10], names[location][10]);
                Assert.AreEqual(new Span(6, 8, location), spans[location][11]);
                Assert.AreEqual(expectedLocation[11], names[location][11]);
                Assert.AreEqual(new Span(4, 6, location), spans[location][12]);
                Assert.AreEqual(expectedLocation[12], names[location][12]);
                Assert.AreEqual(new Span(10, 11, location), spans[location][13]);
                Assert.AreEqual(expectedLocation[13], names[location][13]);
                Assert.AreEqual(new Span(12, 13, location), spans[location][14]);
                Assert.AreEqual(expectedLocation[14], names[location][14]);
                Assert.AreEqual(new Span(5, 9, location), spans[location][15]);
                Assert.AreEqual(expectedLocation[15], names[location][15]);
                Assert.AreEqual(new Span(11, 12, location), spans[location][16]);
                Assert.AreEqual(expectedLocation[16], names[location][16]);

                Assert.AreEqual(new Span(7, 15, organization), spans[organization][0]);
                Assert.AreEqual(expectedOrganization[0], names[organization][0]);
            }
        }
        public void TestWithNameTypeAndInvalidData2() {
            var sampleStream = new NameSampleStream(new GenericObjectStream<string>(
                "<START:street> <START:person> Name <END> <END>"));

            sampleStream.Read();
        }
        public void TestWithNameTypes() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));
                var names = new Dictionary<string, List<string>>();
                var spans = new Dictionary<string, List<Span>>();

                NameSample ns;
                while ((ns = sampleStream.Read()) != null) {
                    foreach (var nameSpan in ns.Names) {

                        if (!names.ContainsKey(nameSpan.Type)) {

                            names.Add(nameSpan.Type, new List<string>());
                            spans.Add(nameSpan.Type, new List<Span>());

                        }
                        names[nameSpan.Type].Add(nameSpan.GetCoveredText(ns.Sentence));
                        spans[nameSpan.Type].Add(nameSpan);
                    }
                }

                string[] expectedPerson = {
                    "Barack Obama", "Obama", "Obama",
                    "Lee Myung - bak", "Obama", "Obama", "Scott Snyder", "Snyder", "Obama",
                    "Obama", "Obama", "Tim Peters", "Obama", "Peters"
                };

                string[] expectedDate = {"Wednesday", "Thursday", "Wednesday"};

                string[] expectedLocation = {
                    "U . S .", "South Korea", "North Korea",
                    "China", "South Korea", "North Korea", "North Korea", "U . S .",
                    "South Korea", "United States", "Pyongyang", "North Korea",
                    "South Korea", "Afghanistan", "Seoul", "U . S .", "China"
                };

                string[] expectedOrganization = {"Center for U . S . Korea Policy"};

                Assert.AreEqual(expectedPerson.Length, names[person].Count);
                Assert.AreEqual(expectedDate.Length, names[date].Count);
                Assert.AreEqual(expectedLocation.Length, names[location].Count);
                Assert.AreEqual(expectedOrganization.Length, names[organization].Count);

                Assert.AreEqual(new Span(5, 7, person), spans[person][0]);
                Assert.AreEqual(expectedPerson[0], names[person][0]);
                Assert.AreEqual(new Span(10, 11, person), spans[person][1]);
                Assert.AreEqual(expectedPerson[1], names[person][1]);
                Assert.AreEqual(new Span(29, 30, person), spans[person][2]);
                Assert.AreEqual(expectedPerson[2], names[person][2]);
                Assert.AreEqual(new Span(23, 27, person), spans[person][3]);
                Assert.AreEqual(expectedPerson[3], names[person][3]);
                Assert.AreEqual(new Span(1, 2, person), spans[person][4]);
                Assert.AreEqual(expectedPerson[4], names[person][4]);
                Assert.AreEqual(new Span(8, 9, person), spans[person][5]);
                Assert.AreEqual(expectedPerson[5], names[person][5]);
                Assert.AreEqual(new Span(0, 2, person), spans[person][6]);
                Assert.AreEqual(expectedPerson[6], names[person][6]);
                Assert.AreEqual(new Span(25, 26, person), spans[person][7]);
                Assert.AreEqual(expectedPerson[7], names[person][7]);
                Assert.AreEqual(new Span(1, 2, person), spans[person][8]);
                Assert.AreEqual(expectedPerson[8], names[person][8]);
                Assert.AreEqual(new Span(6, 7, person), spans[person][9]);
                Assert.AreEqual(expectedPerson[9], names[person][9]);
                Assert.AreEqual(new Span(14, 15, person), spans[person][10]);
                Assert.AreEqual(expectedPerson[10], names[person][10]);
                Assert.AreEqual(new Span(0, 2, person), spans[person][11]);
                Assert.AreEqual(expectedPerson[11], names[person][11]);
                Assert.AreEqual(new Span(12, 13, person), spans[person][12]);
                Assert.AreEqual(expectedPerson[12], names[person][12]);
                Assert.AreEqual(new Span(12, 13, person), spans[person][13]);
                Assert.AreEqual(expectedPerson[13], names[person][13]);

                Assert.AreEqual(new Span(7, 8, date), spans[date][0]);
                Assert.AreEqual(expectedDate[0], names[date][0]);
                Assert.AreEqual(new Span(27, 28, date), spans[date][1]);
                Assert.AreEqual(expectedDate[1], names[date][1]);
                Assert.AreEqual(new Span(15, 16, date), spans[date][2]);
                Assert.AreEqual(expectedDate[2], names[date][2]);

                Assert.AreEqual(new Span(0, 4, location), spans[location][0]);
                Assert.AreEqual(expectedLocation[0], names[location][0]);
                Assert.AreEqual(new Span(10, 12, location), spans[location][1]);
                Assert.AreEqual(expectedLocation[1], names[location][1]);
                Assert.AreEqual(new Span(28, 30, location), spans[location][2]);
                Assert.AreEqual(expectedLocation[2], names[location][2]);
                Assert.AreEqual(new Span(3, 4, location), spans[location][3]);
                Assert.AreEqual(expectedLocation[3], names[location][3]);
                Assert.AreEqual(new Span(5, 7, location), spans[location][4]);
                Assert.AreEqual(expectedLocation[4], names[location][4]);
                Assert.AreEqual(new Span(16, 18, location), spans[location][5]);
                Assert.AreEqual(expectedLocation[5], names[location][5]);
                Assert.AreEqual(new Span(1, 3, location), spans[location][6]);
                Assert.AreEqual(expectedLocation[6], names[location][6]);
                Assert.AreEqual(new Span(5, 9, location), spans[location][7]);
                Assert.AreEqual(expectedLocation[7], names[location][7]);
                Assert.AreEqual(new Span(0, 2, location), spans[location][8]);
                Assert.AreEqual(expectedLocation[8], names[location][8]);
                Assert.AreEqual(new Span(4, 6, location), spans[location][9]);
                Assert.AreEqual(expectedLocation[9], names[location][9]);
                Assert.AreEqual(new Span(10, 11, location), spans[location][10]);
                Assert.AreEqual(expectedLocation[10], names[location][10]);
                Assert.AreEqual(new Span(6, 8, location), spans[location][11]);
                Assert.AreEqual(expectedLocation[11], names[location][11]);
                Assert.AreEqual(new Span(4, 6, location), spans[location][12]);
                Assert.AreEqual(expectedLocation[12], names[location][12]);
                Assert.AreEqual(new Span(10, 11, location), spans[location][13]);
                Assert.AreEqual(expectedLocation[13], names[location][13]);
                Assert.AreEqual(new Span(12, 13, location), spans[location][14]);
                Assert.AreEqual(expectedLocation[14], names[location][14]);
                Assert.AreEqual(new Span(5, 9, location), spans[location][15]);
                Assert.AreEqual(expectedLocation[15], names[location][15]);
                Assert.AreEqual(new Span(11, 12, location), spans[location][16]);
                Assert.AreEqual(expectedLocation[16], names[location][16]);

                Assert.AreEqual(new Span(7, 15, organization), spans[organization][0]);
                Assert.AreEqual(expectedOrganization[0], names[organization][0]);

            }
        }
        public void TestClearAdaptiveData() {
            var trainingData = new StringBuilder();
            trainingData.Append("a\n");
            trainingData.Append("b\n");
            trainingData.Append("c\n");
            trainingData.Append("\n");
            trainingData.Append("d\n");

            var untokenizedLineStream = new PlainTextByLineStream(new StringReader(trainingData.ToString()));
            var trainingStream = new NameSampleStream(untokenizedLineStream);

            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.False(trainingStream.Read().ClearAdaptiveData);
            Assert.True(trainingStream.Read().ClearAdaptiveData);
            Assert.Null(trainingStream.Read());
        }
        public void TestHtmlNameSampleParsing() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/html1.train")) {
                var ds = new NameSampleStream(new PlainTextByLineStream(file));

                NameSample ns = ds.Read();

                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<html>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<head/>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<body>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<ul>", ns.Sentence[0]);

                // <li> <START:organization> Advanced Integrated Pest Management <END> </li>
                ns = ds.Read();
                Assert.AreEqual(6, ns.Sentence.Length);
                Assert.AreEqual("<li>", ns.Sentence[0]);
                Assert.AreEqual("Advanced", ns.Sentence[1]);
                Assert.AreEqual("Integrated", ns.Sentence[2]);
                Assert.AreEqual("Pest", ns.Sentence[3]);
                Assert.AreEqual("Management", ns.Sentence[4]);
                Assert.AreEqual("</li>", ns.Sentence[5]);
                Assert.AreEqual(new Span(1, 5, organization), ns.Names[0]);

                // <li> <START:organization> Bay Cities Produce Co., Inc. <END> </li>
                ns = ds.Read();
                Assert.AreEqual(7, ns.Sentence.Length);
                Assert.AreEqual("<li>", ns.Sentence[0]);
                Assert.AreEqual("Bay", ns.Sentence[1]);
                Assert.AreEqual("Cities", ns.Sentence[2]);
                Assert.AreEqual("Produce", ns.Sentence[3]);
                Assert.AreEqual("Co.,", ns.Sentence[4]);
                Assert.AreEqual("Inc.", ns.Sentence[5]);
                Assert.AreEqual("</li>", ns.Sentence[6]);
                Assert.AreEqual(new Span(1, 6, organization), ns.Names[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</ul>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</body>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</html>", ns.Sentence[0]);

                Assert.Null(ds.Read());
            }
        }
Example #23
0
        public void TestOnlyWithNamesWithTypes() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize(
                    "Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman Robert Aderholt " +
                    "Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(2, 4, "person"), names[1]);
                Assert.AreEqual(new Span(4, 6, "person"), names[2]);
                Assert.True(!HasOtherAsOutcome(model));
            }
        }