public void TestHtmlNameSampleParsing() { using (var file = Tests.OpenFile("opennlp/tools/namefind/html1.train")) { var ds = new NameSampleStream(new PlainTextByLineStream(file)); NameSample ns = ds.Read(); Assert.AreEqual(1, ns.Sentence.Length); Assert.AreEqual("<html>", ns.Sentence[0]); ns = ds.Read(); Assert.AreEqual(1, ns.Sentence.Length); Assert.AreEqual("<head/>", ns.Sentence[0]); ns = ds.Read(); Assert.AreEqual(1, ns.Sentence.Length); Assert.AreEqual("<body>", ns.Sentence[0]); ns = ds.Read(); Assert.AreEqual(1, ns.Sentence.Length); Assert.AreEqual("<ul>", ns.Sentence[0]); // <li> <START:organization> Advanced Integrated Pest Management <END> </li> ns = ds.Read(); Assert.AreEqual(6, ns.Sentence.Length); Assert.AreEqual("<li>", ns.Sentence[0]); Assert.AreEqual("Advanced", ns.Sentence[1]); Assert.AreEqual("Integrated", ns.Sentence[2]); Assert.AreEqual("Pest", ns.Sentence[3]); Assert.AreEqual("Management", ns.Sentence[4]); Assert.AreEqual("</li>", ns.Sentence[5]); Assert.AreEqual(new Span(1, 5, organization), ns.Names[0]); // <li> <START:organization> Bay Cities Produce Co., Inc. <END> </li> ns = ds.Read(); Assert.AreEqual(7, ns.Sentence.Length); Assert.AreEqual("<li>", ns.Sentence[0]); Assert.AreEqual("Bay", ns.Sentence[1]); Assert.AreEqual("Cities", ns.Sentence[2]); Assert.AreEqual("Produce", ns.Sentence[3]); Assert.AreEqual("Co.,", ns.Sentence[4]); Assert.AreEqual("Inc.", ns.Sentence[5]); Assert.AreEqual("</li>", ns.Sentence[6]); Assert.AreEqual(new Span(1, 6, organization), ns.Names[0]); ns = ds.Read(); Assert.AreEqual(1, ns.Sentence.Length); Assert.AreEqual("</ul>", ns.Sentence[0]); ns = ds.Read(); Assert.AreEqual(1, ns.Sentence.Length); Assert.AreEqual("</body>", ns.Sentence[0]); ns = ds.Read(); Assert.AreEqual(1, ns.Sentence.Length); Assert.AreEqual("</html>", ns.Sentence[0]); Assert.Null(ds.Read()); } }
/// <summary> /// Create a NameSample from scratch and validate it. /// </summary> /// <param name="useTypes">if set to <c>true</c> use nametypes.</param> /// <returns>NameSample.</returns> private static NameSample CreateSimpleNameSample(bool useTypes) { var sentence = new[] { "U", ".", "S", ".", "President", "Barack", "Obama", "is", "considering", "sending", "additional", "American", "forces", "to", "Afghanistan", "." }; Span[] names = { new Span(0, 4, "Location"), new Span(5, 7, "Person"), new Span(14, 15, "Location") }; NameSample nameSample; if (useTypes) { nameSample = new NameSample(sentence, names, false); } else { var namesWithoutType = new Span[names.Length]; for (var i = 0; i < names.Length; i++) { namesWithoutType[i] = new Span(names[i].Start, names[i].End); } nameSample = new NameSample(sentence, namesWithoutType, false); } return(nameSample); }
public void TestOutcomesForSingleTypeSentence() { var sentence = new[] { "Elise", "Wendel", "appreciated", "the", "hint", "and", "enjoyed", "a", "delicious", "traditional", "meal", "." }; var nameSample = new NameSample(sentence, new[] { new Span(0, 2, "person") }, false); var eventStream = new NameFinderEventStream(new CollectionObjectStream <NameSample>(nameSample)); Assert.AreEqual("person-" + NameFinderME.START, eventStream.Read().Outcome); Assert.AreEqual("person-" + NameFinderME.Continue, eventStream.Read().Outcome); for (int i = 0; i < 10; i++) { Assert.AreEqual(NameFinderME.Other, eventStream.Read().Outcome); } Assert.Null(eventStream.Read()); }
public void TestParseWithAdditionalSpace() { const string line = "<START> M . K . <END> <START> Schwitters <END> ? <START> Heartfield <END> ?"; var test = NameSample.Parse(line, false); Assert.AreEqual(8, test.Sentence.Length); }
public void TestTypeWithSpecialChars() { var parsedSample = NameSample.Parse( "<START:type-1> U . S . <END> " + "President <START:type_2> Barack Obama <END> is considering sending " + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .", false); Assert.AreEqual(3, parsedSample.Names.Length); Assert.AreEqual("type-1", parsedSample.Names[0].Type); Assert.AreEqual("type_2", parsedSample.Names[1].Type); Assert.AreEqual("type_3-/;.,&%$", parsedSample.Names[2].Type); }
public void TestNameAtEnd() { var sentence = new[] { "My", "name", "is", "Anna" }; var sample = new NameSample(sentence, new[] { new Span(3, 4) }, false); Assert.AreEqual("My name is <START> Anna <END>", sample.ToString()); }
public void TestWithTypesToString() { var nameSampleStr = CreateSimpleNameSample(false).ToString(); Assert.AreEqual("<START> U . S . <END> President <START> Barack Obama <END> is considering " + "sending additional American forces to <START> Afghanistan <END> .", nameSampleStr); var parsedSample = NameSample.Parse("<START:Location> U . S . <END> " + "President <START:Person> Barack Obama <END> is considering sending " + "additional American forces to <START:Location> Afghanistan <END> .", false); Assert.AreEqual(CreateSimpleNameSample(true), parsedSample); }
public override void run(string[] args) { if (args.Length == 0) { Console.WriteLine(Help); } else { NameFinderME[] nameFinders = new NameFinderME[args.Length]; for (int i = 0; i < nameFinders.Length; i++) { TokenNameFinderModel model = (new TokenNameFinderModelLoader()).load(new File(args[i])); nameFinders[i] = new NameFinderME(model); } ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = untokenizedLineStream.read()) != null) { string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line); // A new line indicates a new document, // adaptive data must be cleared for a new document if (whitespaceTokenizerLine.Length == 0) { foreach (NameFinderME nameFinder in nameFinders) { nameFinder.clearAdaptiveData(); } } IList <Span> names = new List <Span>(); foreach (TokenNameFinder nameFinder in nameFinders) { Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine)); } // Simple way to drop intersecting spans, otherwise the // NameSample is invalid Span[] reducedNames = NameFinderME.dropOverlappingSpans(names.ToArray()); NameSample nameSample = new NameSample(whitespaceTokenizerLine, reducedNames, false); Console.WriteLine(nameSample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public void TestTypeWithInvalidChar2() { NameSample.Parse("<START:abc>a> token <END>", false); }
public void TestTypeWithNewLine() { NameSample.Parse("<START:abc\na> token <END>", false); }
public void TestTypeWithSpace() { NameSample.Parse("<START:abc a> token <END>", false); }
public void TestMissingType() { NameSample.Parse("<START:> token <END>", false); }
public void TestTypeWithInvalidChar2() { Assert.Throws <InvalidOperationException> (() => { NameSample.Parse("<START:abc>a> token <END>", false); }); }
public void TestMissingType() { Assert.Throws <InvalidOperationException> (() => { NameSample.Parse("<START:> token <END>", false); }); }