Exemple #1
0
        public void TestHtmlNameSampleParsing()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/html1.train")) {
                var ds = new NameSampleStream(new PlainTextByLineStream(file));

                NameSample ns = ds.Read();

                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<html>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<head/>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<body>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("<ul>", ns.Sentence[0]);

                // <li> <START:organization> Advanced Integrated Pest Management <END> </li>
                ns = ds.Read();
                Assert.AreEqual(6, ns.Sentence.Length);
                Assert.AreEqual("<li>", ns.Sentence[0]);
                Assert.AreEqual("Advanced", ns.Sentence[1]);
                Assert.AreEqual("Integrated", ns.Sentence[2]);
                Assert.AreEqual("Pest", ns.Sentence[3]);
                Assert.AreEqual("Management", ns.Sentence[4]);
                Assert.AreEqual("</li>", ns.Sentence[5]);
                Assert.AreEqual(new Span(1, 5, organization), ns.Names[0]);

                // <li> <START:organization> Bay Cities Produce Co., Inc. <END> </li>
                ns = ds.Read();
                Assert.AreEqual(7, ns.Sentence.Length);
                Assert.AreEqual("<li>", ns.Sentence[0]);
                Assert.AreEqual("Bay", ns.Sentence[1]);
                Assert.AreEqual("Cities", ns.Sentence[2]);
                Assert.AreEqual("Produce", ns.Sentence[3]);
                Assert.AreEqual("Co.,", ns.Sentence[4]);
                Assert.AreEqual("Inc.", ns.Sentence[5]);
                Assert.AreEqual("</li>", ns.Sentence[6]);
                Assert.AreEqual(new Span(1, 6, organization), ns.Names[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</ul>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</body>", ns.Sentence[0]);

                ns = ds.Read();
                Assert.AreEqual(1, ns.Sentence.Length);
                Assert.AreEqual("</html>", ns.Sentence[0]);

                Assert.Null(ds.Read());
            }
        }
Exemple #2
0
        /// <summary>
        /// Create a NameSample from scratch and validate it.
        /// </summary>
        /// <param name="useTypes">if set to <c>true</c> use nametypes.</param>
        /// <returns>NameSample.</returns>
        private static NameSample CreateSimpleNameSample(bool useTypes)
        {
            var sentence = new[] {
                "U", ".", "S", ".", "President", "Barack", "Obama", "is",
                "considering", "sending", "additional", "American", "forces",
                "to", "Afghanistan", "."
            };

            Span[] names =
            {
                new Span(0,   4, "Location"),
                new Span(5,   7, "Person"),
                new Span(14, 15, "Location")
            };

            NameSample nameSample;

            if (useTypes)
            {
                nameSample = new NameSample(sentence, names, false);
            }
            else
            {
                var namesWithoutType = new Span[names.Length];
                for (var i = 0; i < names.Length; i++)
                {
                    namesWithoutType[i] = new Span(names[i].Start,
                                                   names[i].End);
                }

                nameSample = new NameSample(sentence, namesWithoutType, false);
            }

            return(nameSample);
        }
Exemple #3
0
        public void TestOutcomesForSingleTypeSentence()
        {
            var sentence = new[] {
                "Elise",
                "Wendel",
                "appreciated",
                "the",
                "hint",
                "and",
                "enjoyed",
                "a",
                "delicious",
                "traditional",
                "meal",
                "."
            };

            var nameSample  = new NameSample(sentence, new[] { new Span(0, 2, "person") }, false);
            var eventStream = new NameFinderEventStream(new CollectionObjectStream <NameSample>(nameSample));

            Assert.AreEqual("person-" + NameFinderME.START, eventStream.Read().Outcome);
            Assert.AreEqual("person-" + NameFinderME.Continue, eventStream.Read().Outcome);

            for (int i = 0; i < 10; i++)
            {
                Assert.AreEqual(NameFinderME.Other, eventStream.Read().Outcome);
            }

            Assert.Null(eventStream.Read());
        }
Exemple #4
0
        public void TestParseWithAdditionalSpace()
        {
            const string line = "<START> M . K . <END> <START> Schwitters <END> ?  <START> Heartfield <END> ?";

            var test = NameSample.Parse(line, false);

            Assert.AreEqual(8, test.Sentence.Length);
        }
Exemple #5
0
        public void TestTypeWithSpecialChars()
        {
            var parsedSample = NameSample.Parse(
                "<START:type-1> U . S . <END> "
                + "President <START:type_2> Barack Obama <END> is considering sending "
                + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .", false);

            Assert.AreEqual(3, parsedSample.Names.Length);
            Assert.AreEqual("type-1", parsedSample.Names[0].Type);
            Assert.AreEqual("type_2", parsedSample.Names[1].Type);
            Assert.AreEqual("type_3-/;.,&%$", parsedSample.Names[2].Type);
        }
Exemple #6
0
        public void TestNameAtEnd()
        {
            var sentence = new[] {
                "My",
                "name",
                "is",
                "Anna"
            };

            var sample = new NameSample(sentence, new[] { new Span(3, 4) }, false);

            Assert.AreEqual("My name is <START> Anna <END>", sample.ToString());
        }
Exemple #7
0
        public void TestWithTypesToString()
        {
            var nameSampleStr = CreateSimpleNameSample(false).ToString();

            Assert.AreEqual("<START> U . S . <END> President <START> Barack Obama <END> is considering " +
                            "sending additional American forces to <START> Afghanistan <END> .", nameSampleStr);

            var parsedSample = NameSample.Parse("<START:Location> U . S . <END> " +
                                                "President <START:Person> Barack Obama <END> is considering sending " +
                                                "additional American forces to <START:Location> Afghanistan <END> .",
                                                false);

            Assert.AreEqual(CreateSimpleNameSample(true), parsedSample);
        }
        public override void run(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine(Help);
            }
            else
            {
                NameFinderME[] nameFinders = new NameFinderME[args.Length];

                for (int i = 0; i < nameFinders.Length; i++)
                {
                    TokenNameFinderModel model = (new TokenNameFinderModelLoader()).load(new File(args[i]));
                    nameFinders[i] = new NameFinderME(model);
                }

                ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = untokenizedLineStream.read()) != null)
                    {
                        string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);

                        // A new line indicates a new document,
                        // adaptive data must be cleared for a new document

                        if (whitespaceTokenizerLine.Length == 0)
                        {
                            foreach (NameFinderME nameFinder in nameFinders)
                            {
                                nameFinder.clearAdaptiveData();
                            }
                        }

                        IList <Span> names = new List <Span>();

                        foreach (TokenNameFinder nameFinder in nameFinders)
                        {
                            Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine));
                        }

                        // Simple way to drop intersecting spans, otherwise the
                        // NameSample is invalid
                        Span[] reducedNames = NameFinderME.dropOverlappingSpans(names.ToArray());

                        NameSample nameSample = new NameSample(whitespaceTokenizerLine, reducedNames, false);

                        Console.WriteLine(nameSample.ToString());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
Exemple #9
0
 public void TestTypeWithInvalidChar2()
 {
     NameSample.Parse("<START:abc>a> token <END>", false);
 }
Exemple #10
0
 public void TestTypeWithNewLine()
 {
     NameSample.Parse("<START:abc\na> token <END>", false);
 }
Exemple #11
0
 public void TestTypeWithSpace()
 {
     NameSample.Parse("<START:abc a> token <END>", false);
 }
Exemple #12
0
 public void TestMissingType()
 {
     NameSample.Parse("<START:> token <END>", false);
 }
 public void TestTypeWithInvalidChar2()
 {
     Assert.Throws <InvalidOperationException> (() => {
         NameSample.Parse("<START:abc>a> token <END>", false);
     });
 }
 public void TestMissingType()
 {
     Assert.Throws <InvalidOperationException> (() => {
         NameSample.Parse("<START:> token <END>", false);
     });
 }