/// <summary> /// Generates chunk tags for the given sequence returning the result in an array. /// </summary> /// <param name="tokens">an array of the tokens or words of the sequence.</param> /// <param name="tags">an array of the pos tags of the sequence.</param> /// <returns>An array of chunk tags for each token in the sequence or a <c>null</c> value if none.</returns> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="tokens"/> is null. /// or /// The <paramref name="tags"/> is null. /// </exception> /// <exception cref="System.ArgumentOutOfRangeException">The token array is empty.</exception> public string[] Chunk(string[] tokens, string[] tags) { if (tokens == null) { throw new ArgumentNullException("tokens"); } if (tokens.Length == 0) { throw new ArgumentOutOfRangeException("tokens", "The token array is empty."); } if (tags == null) { throw new ArgumentNullException("tags"); } bestSequence = model.BestSequence(tokens, new object[] { tags }, contextGenerator, sequenceValidator); return(bestSequence == null ? null : bestSequence.Outcomes.ToArray()); }
/// <summary> /// Returns the lemma of the specified word with the specified part-of-speech. /// </summary> /// <param name="tokens">An array of the tokens.</param> /// <param name="tags">An array of the POS tags.</param> /// <returns>An array of lemma classes for each token in the sequence.</returns> /// <exception cref="ArgumentNullException"><paramref name="tokens" /> or <paramref name="tags" /></exception> /// <exception cref="ArgumentException">The arguments must have the same length.</exception> public string[] Lemmatize(string[] tokens, string[] tags) { if (tokens == null) { throw new ArgumentNullException(nameof(tokens)); } if (tags == null) { throw new ArgumentNullException(nameof(tags)); } if (tokens.Length != tags.Length) { throw new ArgumentException("The arguments must have the same length."); } bestSequence = model.BestSequence(tokens, new object[] { tags }, contextGenerator, sequenceValidator); return(bestSequence.Outcomes.ToArray()); }
/// <summary> /// Generates name tags for the given sequence, typically a sentence, returning token spans for any identified names. /// </summary> /// <param name="tokens">An array of the tokens or words of the sequence, typically a sentence.</param> /// <param name="additionalContext">Features which are based on context outside of the sentence but which should also be used.</param> /// <returns>An array of spans for each of the names identified.</returns> public Span[] Find(string[] tokens, string[][] additionalContext) { additionalContextFeatureGenerator.SetCurrentContext(additionalContext); bestSequence = model.BestSequence(tokens, Array.ConvertAll(additionalContext, input => (object)input), contextGenerator, sequenceValidator); var outcomes = bestSequence.Outcomes.ToArray(); contextGenerator.UpdateAdaptiveData(tokens, outcomes); var spans = sequenceCodec.Decode(outcomes); var probs = Probs(spans); for (var i = 0; i < probs.Length; i++) { spans[i].Probability = probs[i]; } return(spans); }
/// <summary> /// Assigns the sentence of tokens pos tags. /// </summary> /// <param name="sentence">The sentence of tokens to be tagged.</param> /// <param name="additionalContext">Any addition context specific to a class implementing this interface.</param> /// <returns>an array of pos tags for each token provided in sentence.</returns> public string[] Tag(string[] sentence, object[] additionalContext) { bestSequence = model.BestSequence(sentence, additionalContext, ContextGenerator, SequenceValidator); return bestSequence == null ? new string[0] : bestSequence.Outcomes.ToArray(); }
/// <summary> /// Finds the n most probable sequences. /// </summary> /// <param name="numSequences">The number sequences.</param> /// <param name="sequence">The sequence.</param> /// <param name="additionalContext">The additional context.</param> /// <param name="minSequenceScore">The minimum sequence score.</param> /// <param name="beamSearch">The beam search.</param> /// <param name="validator">The validator.</param> public Sequence[] BestSequences(int numSequences, T[] sequence, object[] additionalContext, double minSequenceScore, IBeamSearchContextGenerator <T> beamSearch, ISequenceValidator <T> validator) { IHeap <Sequence> prev = new ListHeap <Sequence>(size); IHeap <Sequence> next = new ListHeap <Sequence>(size); prev.Add(new Sequence()); if (additionalContext == null) { additionalContext = new object[] {}; // EMPTY_ADDITIONAL_CONTEXT } for (var i = 0; i < sequence.Length; i++) { var sz = Math.Min(size, prev.Size()); for (var sc = 0; prev.Size() > 0 && sc < sz; sc++) { var top = prev.Extract(); var tmpOutcomes = top.Outcomes; var outcomes = tmpOutcomes.ToArray(); var contexts = beamSearch.GetContext(i, sequence, outcomes, additionalContext); double[] scores; if (contextsCache != null) { scores = (double[])contextsCache.Get(contexts); if (scores == null) { scores = model.Eval(contexts, probs); contextsCache.Put(contexts, scores); } } else { scores = model.Eval(contexts, probs); } var tempScores = new double[scores.Length]; for (var c = 0; c < scores.Length; c++) { tempScores[c] = scores[c]; } Array.Sort(tempScores); var min = tempScores[Math.Max(0, scores.Length - size)]; for (var p = 0; p < scores.Length; p++) { if (scores[p] < min) { continue; //only advance first "size" outcomes } var outcome = model.GetOutcome(p); if (validator.ValidSequence(i, sequence, outcomes, outcome)) { var ns = new Sequence(top, outcome, scores[p]); if (ns.Score > minSequenceScore) { next.Add(ns); } } } if (next.Size() == 0) { //if no advanced sequences, advance all valid for (var p = 0; p < scores.Length; p++) { var outcome = model.GetOutcome(p); if (validator.ValidSequence(i, sequence, outcomes, outcome)) { var ns = new Sequence(top, outcome, scores[p]); if (ns.Score > minSequenceScore) { next.Add(ns); } } } } } // make prev = next; and re-init next (we reuse existing prev set once we clear it) prev.Clear(); var tmp = prev; prev = next; next = tmp; } var numSeq = Math.Min(numSequences, prev.Size()); var topSequences = new Sequence[numSeq]; for (var seqIndex = 0; seqIndex < numSeq; seqIndex++) { topSequences[seqIndex] = prev.Extract(); } return(topSequences); }
/// <summary> /// Generates chunk tags for the given sequence returning the result in an array. /// </summary> /// <param name="tokens">an array of the tokens or words of the sequence.</param> /// <param name="tags">an array of the pos tags of the sequence.</param> /// <returns>an array of chunk tags for each token in the sequence.</returns> public string[] Chunk(string[] tokens, string[] tags) { bestSequence = model.BestSequence(tokens, new object[] { tags }, contextGenerator, sequenceValidator); return(bestSequence.Outcomes.ToArray()); }
/// <summary> /// Generates name tags for the given sequence, typically a sentence, returning token spans for any identified names. /// </summary> /// <param name="tokens">An array of the tokens or words of the sequence, typically a sentence.</param> /// <param name="additionalContext">Features which are based on context outside of the sentence but which should also be used.</param> /// <returns>An array of spans for each of the names identified.</returns> public Span[] Find(string[] tokens, string[][] additionalContext) { additionalContextFeatureGenerator.SetCurrentContext(additionalContext); bestSequence = model.BestSequence(tokens, Array.ConvertAll(additionalContext, input => (object) input), contextGenerator, sequenceValidator); var outcomes = bestSequence.Outcomes.ToArray(); contextGenerator.UpdateAdaptiveData(tokens, outcomes); var spans = sequenceCodec.Decode(outcomes); var probs = Probs(spans); for (var i = 0; i < probs.Length; i++) { spans[i].Probability = probs[i]; } return spans; }
/// <summary> /// Assigns the sentence of tokens pos tags. /// </summary> /// <param name="sentence">The sentence of tokens to be tagged.</param> /// <param name="additionalContext">Any addition context specific to a class implementing this interface.</param> /// <returns>an array of pos tags for each token provided in sentence.</returns> public string[] Tag(string[] sentence, object[] additionalContext) { bestSequence = model.BestSequence(sentence, additionalContext, ContextGenerator, SequenceValidator); return(bestSequence.Outcomes.ToArray()); }
/// <summary> /// Generates chunk tags for the given sequence returning the result in an array. /// </summary> /// <param name="tokens">an array of the tokens or words of the sequence.</param> /// <param name="tags">an array of the pos tags of the sequence.</param> /// <returns>An array of chunk tags for each token in the sequence or a <c>null</c> value if none.</returns> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="tokens"/> is null. /// or /// The <paramref name="tags"/> is null. /// </exception> /// <exception cref="System.ArgumentOutOfRangeException">The token array is empty.</exception> public string[] Chunk(string[] tokens, string[] tags) { if (tokens == null) throw new ArgumentNullException("tokens"); if (tokens.Length == 0) throw new ArgumentOutOfRangeException("tokens", "The token array is empty."); if (tags == null) throw new ArgumentNullException("tags"); bestSequence = model.BestSequence(tokens, new object[] {tags}, contextGenerator, sequenceValidator); return bestSequence == null ? null : bestSequence.Outcomes.ToArray(); }