/// <summary> /// Evaluates the samples with a given number of partitions. /// </summary> /// <param name="samples">The samples to train and test.</param> /// <param name="partitions">The number of folds.</param> public void Evaluate(IObjectStream <NameSample> samples, int partitions) { // Note: The name samples need to be grouped on a document basis. var partitioner = new CrossValidationPartitioner <DocumentSample>(new NameToDocumentSampleStream(samples), partitions); while (partitioner.HasNext) { var trainingSampleStream = partitioner.Next(); var model = NameFinderME.Train( languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), parameters, factory); // do testing var evaluator = new TokenNameFinderEvaluator(new NameFinderME(model), listeners); evaluator.Evaluate(new DocumentToNameSampleStream(trainingSampleStream.GetTestSampleStream())); FMeasure.MergeInto(evaluator.FMeasure); } }
public void TestNameFinder() { using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1")); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "70"); param.Set(Parameters.Cutoff, "1"); var model = NameFinderME.Train( "en", sampleStream, param, new TokenNameFinderFactory(null, new Dictionary<string, object>())); var nameFinder = new NameFinderME(model); // now test if it can detect the sample sentences var sentence = new[] { "Alisa", "appreciated", "the", "hint", "and", "enjoyed", "a", "delicious", "traditional", "meal." }; var names = nameFinder.Find(sentence); Assert.AreEqual(1, names.Length); Assert.AreEqual(new Span(0, 1, Type), names[0]); sentence = new[] { "Hi", "Mike", ",", "it's", "Stefanie", "Schmidt", "." }; names = nameFinder.Find(sentence); Assert.AreEqual(2, names.Length); Assert.AreEqual(new Span(1, 2, Type), names[0]); Assert.AreEqual(new Span(4, 6, Type), names[1]); } }
/// <summary> /// Creates a new event array based on the outcomes predicted by the specified parameters for the specified sequence. /// </summary> /// <param name="sequence">The sequence to be evaluated.</param> /// <param name="model">The model.</param> /// <returns>The event array.</returns> public Event[] UpdateContext(Sequence sequence, AbstractModel model) { var tagger = new NameFinderME( new TokenNameFinderModel("x-unspecified", model, new Dictionary <string, object>(), null)); var sentence = sequence.GetSource <NameSample>().Sentence; var tags = seqCodec.Encode(tagger.Find(sentence), sentence.Length); return(NameFinderEventStream.GenerateEvents(sentence, tags, pcg).ToArray()); }
/// <summary> /// Determines whether a particular continuation of a sequence is valid. /// This is used to restrict invalid sequences such as those used in start/continue tag-based chunking or could be used to implement tag dictionary restrictions. /// </summary> /// <param name="index">The index in the input sequence for which the new outcome is being proposed.</param> /// <param name="inputSequence">The input sequence.</param> /// <param name="outcomesSequence">The outcomes so far in this sequence.</param> /// <param name="outcome">The next proposed outcome for the outcomes sequence.</param> /// <returns><c>true</c> if the sequence would still be valid with the new outcome, <c>false</c> otherwise.</returns> public bool ValidSequence(int index, string[] inputSequence, string[] outcomesSequence, string outcome) { // outcome is formatted like "cont" or "sometype-cont", so we // can check if it ends with "cont". if (!outcome.EndsWith(NameFinderME.Continue)) { return(true); } var li = outcomesSequence.Length - 1; if (li == -1) { return(false); } if (outcomesSequence[li].EndsWith(NameFinderME.Other)) { return(false); } if (!outcomesSequence[li].EndsWith(NameFinderME.Continue)) { return(true); } var nameType = NameFinderME.ExtractNameType(outcome); var previousNameType = NameFinderME.ExtractNameType(outcomesSequence[li]); if (previousNameType == null && nameType == null) { return(true); } return(nameType != null && nameType.Equals(previousNameType)); }
/// <summary> /// Creates the context generator. /// </summary> /// <returns>INameContextGenerator.</returns> public virtual INameContextGenerator CreateContextGenerator() { return(new DefaultNameContextGenerator(CreateFeatureGenerators() ?? NameFinderME.CreateFeatureGenerator())); }
/// <summary> /// Creates a new event array based on the outcomes predicted by the specified parameters for the specified sequence. /// </summary> /// <param name="sequence">The sequence to be evaluated.</param> /// <param name="model">The model.</param> /// <returns>The event array.</returns> public Event[] UpdateContext(Sequence sequence, AbstractModel model) { var tagger = new NameFinderME( new TokenNameFinderModel("x-unspecified", model, new Dictionary<string, object>(), null)); var sentence = sequence.GetSource<NameSample>().Sentence; var tags = seqCodec.Encode(tagger.Find(sentence), sentence.Length); return NameFinderEventStream.GenerateEvents(sentence, tags, pcg).ToArray(); }
public void TestNameFinderWithMultipleTypes() { using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file)); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "70"); param.Set(Parameters.Cutoff, "1"); var model = NameFinderME.Train( "en", sampleStream, param, new TokenNameFinderFactory(null, new Dictionary<string, object>())); var nameFinder = new NameFinderME(model); // now test if it can detect the sample sentences var sentence = new [] { "U", ".", "S", ".", "President", "Barack", "Obama", "has", "arrived", "in", "South", "Korea", ",", "where", "he", "is", "expected", "to", "show", "solidarity", "with", "the", "country", "'", "s", "president", "in", "demanding", "North", "Korea", "move", "toward", "ending", "its", "nuclear", "weapons", "programs", "." }; var names = nameFinder.Find(sentence); Assert.AreEqual(4, names.Length); Assert.AreEqual(new Span(0, 4, "location"), names[0]); Assert.AreEqual(new Span(5, 7, "person"), names[1]); Assert.AreEqual(new Span(10, 12, "location"), names[2]); Assert.AreEqual(new Span(28, 30, "location"), names[3]); /* These asserts are not needed because the equality comparer handles the Type assertEquals("location", names1[0].getType()); assertEquals("person", names1[1].getType()); assertEquals("location", names1[2].getType()); assertEquals("location", names1[3].getType()); */ sentence = new[] { "Scott", "Snyder", "is", "the", "director", "of", "the", "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "." }; names = nameFinder.Find(sentence); Assert.AreEqual(2, names.Length); Assert.AreEqual(new Span(0, 2, "person"), names[0]); Assert.AreEqual(new Span(7, 15, "organization"), names[1]); /* assertEquals("person", names2[0].getType()); assertEquals("organization", names2[1].getType()); */ } }
public void TestOnlyWithEntitiesWithTypes() { using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file)); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "70"); param.Set(Parameters.Cutoff, "1"); var model = NameFinderME.Train( "en", sampleStream, param, new TokenNameFinderFactory(null, new Dictionary<string, object>())); var nameFinder = new NameFinderME(model); // now test if it can detect the sample sentences var sentence = WhitespaceTokenizer.Instance.Tokenize("NATO United States Barack Obama"); var names = nameFinder.Find(sentence); Assert.AreEqual(new Span(0, 1, "organization"), names[0]); Assert.AreEqual(new Span(1, 3, "location"), names[1]); Assert.AreEqual(new Span(3, 5, "person"), names[2]); Assert.False(HasOtherAsOutcome(model)); } }
public void TestOnlyWithNamesWithTypes() { using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file)); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "70"); param.Set(Parameters.Cutoff, "1"); var model = NameFinderME.Train( "en", sampleStream, param, new TokenNameFinderFactory(null, new Dictionary<string, object>())); var nameFinder = new NameFinderME(model); // now test if it can detect the sample sentences var sentence = WhitespaceTokenizer.Instance.Tokenize( "Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman Robert Aderholt " + "Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander"); var names = nameFinder.Find(sentence); Assert.AreEqual(new Span(0, 2, "person"), names[0]); Assert.AreEqual(new Span(2, 4, "person"), names[1]); Assert.AreEqual(new Span(4, 6, "person"), names[2]); Assert.True(!HasOtherAsOutcome(model)); } }