Exemple #1
0
        /// <summary>
        /// Evaluates the samples with a given number of partitions.
        /// </summary>
        /// <param name="samples">The samples to train and test.</param>
        /// <param name="partitions">The number of folds.</param>
        public void Evaluate(IObjectStream <NameSample> samples, int partitions)
        {
            // Note: The name samples need to be grouped on a document basis.

            var partitioner = new CrossValidationPartitioner <DocumentSample>(new NameToDocumentSampleStream(samples),
                                                                              partitions);

            while (partitioner.HasNext)
            {
                var trainingSampleStream = partitioner.Next();

                var model = NameFinderME.Train(
                    languageCode,
                    type,
                    new DocumentToNameSampleStream(trainingSampleStream),
                    parameters,
                    factory);

                // do testing
                var evaluator = new TokenNameFinderEvaluator(new NameFinderME(model), listeners);

                evaluator.Evaluate(new DocumentToNameSampleStream(trainingSampleStream.GetTestSampleStream()));

                FMeasure.MergeInto(evaluator.FMeasure);
            }
        }
        public void TestNameFinder() {

            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new[] {
                    "Alisa",
                    "appreciated",
                    "the",
                    "hint",
                    "and",
                    "enjoyed",
                    "a",
                    "delicious",
                    "traditional",
                    "meal."
                };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(1, names.Length);
                Assert.AreEqual(new Span(0, 1, Type), names[0]);

                sentence = new[] {
                    "Hi",
                    "Mike",
                    ",",
                    "it's",
                    "Stefanie",
                    "Schmidt",
                    "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(1, 2, Type), names[0]);
                Assert.AreEqual(new Span(4, 6, Type), names[1]);

            }
        }
Exemple #3
0
        /// <summary>
        /// Creates a new event array based on the outcomes predicted by the specified parameters for the specified sequence.
        /// </summary>
        /// <param name="sequence">The sequence to be evaluated.</param>
        /// <param name="model">The model.</param>
        /// <returns>The event array.</returns>
        public Event[] UpdateContext(Sequence sequence, AbstractModel model)
        {
            var tagger =
                new NameFinderME(
                    new TokenNameFinderModel("x-unspecified", model, new Dictionary <string, object>(), null));

            var sentence = sequence.GetSource <NameSample>().Sentence;

            var tags = seqCodec.Encode(tagger.Find(sentence), sentence.Length);

            return(NameFinderEventStream.GenerateEvents(sentence, tags, pcg).ToArray());
        }
        /// <summary>
        /// Determines whether a particular continuation of a sequence is valid.
        /// This is used to restrict invalid sequences such as those used in start/continue tag-based chunking or could be used to implement tag dictionary restrictions.
        /// </summary>
        /// <param name="index">The index in the input sequence for which the new outcome is being proposed.</param>
        /// <param name="inputSequence">The input sequence.</param>
        /// <param name="outcomesSequence">The outcomes so far in this sequence.</param>
        /// <param name="outcome">The next proposed outcome for the outcomes sequence.</param>
        /// <returns><c>true</c> if the sequence would still be valid with the new outcome, <c>false</c> otherwise.</returns>
        public bool ValidSequence(int index, string[] inputSequence, string[] outcomesSequence, string outcome)
        {
            // outcome is formatted like "cont" or "sometype-cont", so we
            // can check if it ends with "cont".

            if (!outcome.EndsWith(NameFinderME.Continue))
            {
                return(true);
            }

            var li = outcomesSequence.Length - 1;

            if (li == -1)
            {
                return(false);
            }

            if (outcomesSequence[li].EndsWith(NameFinderME.Other))
            {
                return(false);
            }

            if (!outcomesSequence[li].EndsWith(NameFinderME.Continue))
            {
                return(true);
            }

            var nameType         = NameFinderME.ExtractNameType(outcome);
            var previousNameType = NameFinderME.ExtractNameType(outcomesSequence[li]);

            if (previousNameType == null && nameType == null)
            {
                return(true);
            }

            return(nameType != null && nameType.Equals(previousNameType));
        }
 /// <summary>
 /// Creates the context generator.
 /// </summary>
 /// <returns>INameContextGenerator.</returns>
 public virtual INameContextGenerator CreateContextGenerator()
 {
     return(new DefaultNameContextGenerator(CreateFeatureGenerators() ?? NameFinderME.CreateFeatureGenerator()));
 }
        /// <summary>
        /// Creates a new event array based on the outcomes predicted by the specified parameters for the specified sequence.
        /// </summary>
        /// <param name="sequence">The sequence to be evaluated.</param>
        /// <param name="model">The model.</param>
        /// <returns>The event array.</returns>
        public Event[] UpdateContext(Sequence sequence, AbstractModel model) {
            var tagger =
                new NameFinderME(
                    new TokenNameFinderModel("x-unspecified", model, new Dictionary<string, object>(), null));

            var sentence = sequence.GetSource<NameSample>().Sentence;

            var tags = seqCodec.Encode(tagger.Find(sentence), sentence.Length);

            return NameFinderEventStream.GenerateEvents(sentence, tags, pcg).ToArray();
        }
        public void TestNameFinderWithMultipleTypes() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new [] { "U", ".", "S", ".", "President", "Barack", "Obama", "has", 
                    "arrived", "in", "South", "Korea", ",", "where", "he", "is", "expected", "to", 
                    "show", "solidarity", "with", "the", "country", "'", "s", "president", "in",
                    "demanding", "North", "Korea", "move", "toward", "ending", "its", "nuclear", 
                    "weapons", "programs", "." };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(4, names.Length);
                Assert.AreEqual(new Span(0, 4, "location"), names[0]);
                Assert.AreEqual(new Span(5, 7, "person"), names[1]);
                Assert.AreEqual(new Span(10, 12, "location"), names[2]);
                Assert.AreEqual(new Span(28, 30, "location"), names[3]);

                /*
                These asserts are not needed because the equality comparer handles the Type 
                assertEquals("location", names1[0].getType());
                assertEquals("person", names1[1].getType());
                assertEquals("location", names1[2].getType());
                assertEquals("location", names1[3].getType());
                 */

                sentence = new[] {
                    "Scott", "Snyder", "is", "the", "director", "of", "the", 
                    "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(7, 15, "organization"), names[1]);

                /* 
                 
                assertEquals("person", names2[0].getType());
                assertEquals("organization", names2[1].getType());
                 
                */
            }
        }           
        public void TestOnlyWithEntitiesWithTypes() {

            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize("NATO United States Barack Obama");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 1, "organization"), names[0]);
                Assert.AreEqual(new Span(1, 3, "location"), names[1]);
                Assert.AreEqual(new Span(3, 5, "person"), names[2]);
                Assert.False(HasOtherAsOutcome(model));
            }
        }
        public void TestOnlyWithNamesWithTypes() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize(
                    "Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman Robert Aderholt " +
                    "Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(2, 4, "person"), names[1]);
                Assert.AreEqual(new Span(4, 6, "person"), names[2]);
                Assert.True(!HasOtherAsOutcome(model));
            }
        }