Example #1
0
        /// <summary>
        /// Generates chunk tags for the given sequence returning the result in an array.
        /// </summary>
        /// <param name="tokens">an array of the tokens or words of the sequence.</param>
        /// <param name="tags">an array of the pos tags of the sequence.</param>
        /// <returns>An array of chunk tags for each token in the sequence or a <c>null</c> value if none.</returns>
        /// <exception cref="System.ArgumentNullException">
        /// The <paramref name="tokens"/> is null.
        /// or
        /// The <paramref name="tags"/> is null.
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">The token array is empty.</exception>
        public string[] Chunk(string[] tokens, string[] tags)
        {
            if (tokens == null)
            {
                throw new ArgumentNullException("tokens");
            }

            if (tokens.Length == 0)
            {
                throw new ArgumentOutOfRangeException("tokens", "The token array is empty.");
            }

            if (tags == null)
            {
                throw new ArgumentNullException("tags");
            }

            bestSequence = model.BestSequence(tokens, new object[] { tags }, contextGenerator, sequenceValidator);

            return(bestSequence == null ? null : bestSequence.Outcomes.ToArray());
        }
Example #2
0
        /// <summary>
        /// Returns the lemma of the specified word with the specified part-of-speech.
        /// </summary>
        /// <param name="tokens">An array of the tokens.</param>
        /// <param name="tags">An array of the POS tags.</param>
        /// <returns>An array of lemma classes for each token in the sequence.</returns>
        /// <exception cref="ArgumentNullException"><paramref name="tokens" /> or <paramref name="tags" /></exception>
        /// <exception cref="ArgumentException">The arguments must have the same length.</exception>
        public string[] Lemmatize(string[] tokens, string[] tags)
        {
            if (tokens == null)
            {
                throw new ArgumentNullException(nameof(tokens));
            }

            if (tags == null)
            {
                throw new ArgumentNullException(nameof(tags));
            }

            if (tokens.Length != tags.Length)
            {
                throw new ArgumentException("The arguments must have the same length.");
            }

            bestSequence = model.BestSequence(tokens, new object[] { tags }, contextGenerator, sequenceValidator);

            return(bestSequence.Outcomes.ToArray());
        }
Example #3
0
        /// <summary>
        /// Generates name tags for the given sequence, typically a sentence, returning token spans for any identified names.
        /// </summary>
        /// <param name="tokens">An array of the tokens or words of the sequence, typically a sentence.</param>
        /// <param name="additionalContext">Features which are based on context outside of the sentence but which should also be used.</param>
        /// <returns>An array of spans for each of the names identified.</returns>
        public Span[] Find(string[] tokens, string[][] additionalContext)
        {
            additionalContextFeatureGenerator.SetCurrentContext(additionalContext);

            bestSequence = model.BestSequence(tokens,
                                              Array.ConvertAll(additionalContext, input => (object)input),
                                              contextGenerator,
                                              sequenceValidator);

            var outcomes = bestSequence.Outcomes.ToArray();

            contextGenerator.UpdateAdaptiveData(tokens, outcomes);

            var spans = sequenceCodec.Decode(outcomes);

            var probs = Probs(spans);

            for (var i = 0; i < probs.Length; i++)
            {
                spans[i].Probability = probs[i];
            }

            return(spans);
        }
Example #4
0
 /// <summary>
 /// Assigns the sentence of tokens pos tags.
 /// </summary>
 /// <param name="sentence">The sentence of tokens to be tagged.</param>
 /// <param name="additionalContext">Any addition context specific to a class implementing this interface.</param>
 /// <returns>an array of pos tags for each token provided in sentence.</returns>
 public string[] Tag(string[] sentence, object[] additionalContext) {
     bestSequence = model.BestSequence(sentence, additionalContext, ContextGenerator, SequenceValidator);
     return bestSequence == null ? new string[0] : bestSequence.Outcomes.ToArray();
 }
Example #5
0
        /// <summary>
        /// Finds the n most probable sequences.
        /// </summary>
        /// <param name="numSequences">The number sequences.</param>
        /// <param name="sequence">The sequence.</param>
        /// <param name="additionalContext">The additional context.</param>
        /// <param name="minSequenceScore">The minimum sequence score.</param>
        /// <param name="beamSearch">The beam search.</param>
        /// <param name="validator">The validator.</param>
        public Sequence[] BestSequences(int numSequences, T[] sequence, object[] additionalContext,
                                        double minSequenceScore,
                                        IBeamSearchContextGenerator <T> beamSearch, ISequenceValidator <T> validator)
        {
            IHeap <Sequence> prev = new ListHeap <Sequence>(size);
            IHeap <Sequence> next = new ListHeap <Sequence>(size);

            prev.Add(new Sequence());

            if (additionalContext == null)
            {
                additionalContext = new object[] {}; // EMPTY_ADDITIONAL_CONTEXT
            }

            for (var i = 0; i < sequence.Length; i++)
            {
                var sz = Math.Min(size, prev.Size());

                for (var sc = 0; prev.Size() > 0 && sc < sz; sc++)
                {
                    var top = prev.Extract();

                    var      tmpOutcomes = top.Outcomes;
                    var      outcomes    = tmpOutcomes.ToArray();
                    var      contexts    = beamSearch.GetContext(i, sequence, outcomes, additionalContext);
                    double[] scores;
                    if (contextsCache != null)
                    {
                        scores = (double[])contextsCache.Get(contexts);
                        if (scores == null)
                        {
                            scores = model.Eval(contexts, probs);
                            contextsCache.Put(contexts, scores);
                        }
                    }
                    else
                    {
                        scores = model.Eval(contexts, probs);
                    }

                    var tempScores = new double[scores.Length];
                    for (var c = 0; c < scores.Length; c++)
                    {
                        tempScores[c] = scores[c];
                    }

                    Array.Sort(tempScores);

                    var min = tempScores[Math.Max(0, scores.Length - size)];

                    for (var p = 0; p < scores.Length; p++)
                    {
                        if (scores[p] < min)
                        {
                            continue; //only advance first "size" outcomes
                        }
                        var outcome = model.GetOutcome(p);
                        if (validator.ValidSequence(i, sequence, outcomes, outcome))
                        {
                            var ns = new Sequence(top, outcome, scores[p]);
                            if (ns.Score > minSequenceScore)
                            {
                                next.Add(ns);
                            }
                        }
                    }

                    if (next.Size() == 0)
                    {
                        //if no advanced sequences, advance all valid
                        for (var p = 0; p < scores.Length; p++)
                        {
                            var outcome = model.GetOutcome(p);
                            if (validator.ValidSequence(i, sequence, outcomes, outcome))
                            {
                                var ns = new Sequence(top, outcome, scores[p]);
                                if (ns.Score > minSequenceScore)
                                {
                                    next.Add(ns);
                                }
                            }
                        }
                    }
                }

                // make prev = next; and re-init next (we reuse existing prev set once we clear it)
                prev.Clear();

                var tmp = prev;
                prev = next;
                next = tmp;
            }

            var numSeq       = Math.Min(numSequences, prev.Size());
            var topSequences = new Sequence[numSeq];

            for (var seqIndex = 0; seqIndex < numSeq; seqIndex++)
            {
                topSequences[seqIndex] = prev.Extract();
            }

            return(topSequences);
        }
Example #6
0
        /// <summary>
        /// Generates chunk tags for the given sequence returning the result in an array.
        /// </summary>
        /// <param name="tokens">an array of the tokens or words of the sequence.</param>
        /// <param name="tags">an array of the pos tags of the sequence.</param>
        /// <returns>an array of chunk tags for each token in the sequence.</returns>
        public string[] Chunk(string[] tokens, string[] tags)
        {
            bestSequence = model.BestSequence(tokens, new object[] { tags }, contextGenerator, sequenceValidator);

            return(bestSequence.Outcomes.ToArray());
        }
Example #7
0
        /// <summary>
        /// Generates name tags for the given sequence, typically a sentence, returning token spans for any identified names.
        /// </summary>
        /// <param name="tokens">An array of the tokens or words of the sequence, typically a sentence.</param>
        /// <param name="additionalContext">Features which are based on context outside of the sentence but which should also be used.</param>
        /// <returns>An array of spans for each of the names identified.</returns>
        public Span[] Find(string[] tokens, string[][] additionalContext) {
            additionalContextFeatureGenerator.SetCurrentContext(additionalContext);

            bestSequence = model.BestSequence(tokens,
                Array.ConvertAll(additionalContext, input => (object) input),
                contextGenerator,
                sequenceValidator);

            var outcomes = bestSequence.Outcomes.ToArray();

            contextGenerator.UpdateAdaptiveData(tokens, outcomes);

            var spans = sequenceCodec.Decode(outcomes);

            var probs = Probs(spans);
            for (var i = 0; i < probs.Length; i++) {
                spans[i].Probability = probs[i];
            }

            return spans;
        }
Example #8
0
 /// <summary>
 /// Assigns the sentence of tokens pos tags.
 /// </summary>
 /// <param name="sentence">The sentence of tokens to be tagged.</param>
 /// <param name="additionalContext">Any addition context specific to a class implementing this interface.</param>
 /// <returns>an array of pos tags for each token provided in sentence.</returns>
 public string[] Tag(string[] sentence, object[] additionalContext)
 {
     bestSequence = model.BestSequence(sentence, additionalContext, ContextGenerator, SequenceValidator);
     return(bestSequence.Outcomes.ToArray());
 }
Example #9
0
        /// <summary>
        /// Generates chunk tags for the given sequence returning the result in an array.
        /// </summary>
        /// <param name="tokens">an array of the tokens or words of the sequence.</param>
        /// <param name="tags">an array of the pos tags of the sequence.</param>
        /// <returns>An array of chunk tags for each token in the sequence or a <c>null</c> value if none.</returns>
        /// <exception cref="System.ArgumentNullException">
        /// The <paramref name="tokens"/> is null.
        /// or
        /// The <paramref name="tags"/> is null.
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">The token array is empty.</exception>
        public string[] Chunk(string[] tokens, string[] tags) {
            if (tokens == null)
                throw new ArgumentNullException("tokens");

            if (tokens.Length == 0)
                throw new ArgumentOutOfRangeException("tokens", "The token array is empty.");

            if (tags == null)
                throw new ArgumentNullException("tags");

            bestSequence = model.BestSequence(tokens, new object[] {tags}, contextGenerator, sequenceValidator);

            return bestSequence == null ? null : bestSequence.Outcomes.ToArray();
        }