/// <summary> /// Creates a new sequence of outcomes by cloning an existing sequence. /// </summary> /// <param name="sequenceToCopy"> /// The sequence to create the clone from. /// </param> public Sequence(Sequence sequenceToCopy) { _outcomes = new List<string>(sequenceToCopy.Outcomes.Count + 1); _outcomes.AddRange(sequenceToCopy.Outcomes); _probabilities = new List<double>(sequenceToCopy.Probabilities.Count + 1); _probabilities.AddRange(sequenceToCopy.Probabilities); _score = sequenceToCopy.Score; }
/// <summary> /// Determines whether a particular continuation of a sequence is valid. /// This is used to restrict invalid sequences such as thoses used in start/continue tag-based chunking /// or could be used to implement tag dictionary restrictions. /// </summary> /// <param name="index"> /// The index in the input sequence for which the new outcome is being proposed. /// </param> /// <param name="inputSequence"> /// The input sequnce. /// </param> /// <param name="outcomesSequence"> /// The outcomes so far in this sequence. /// </param> /// <param name="outcome"> /// The next proposed outcome for the outcomes sequence. /// </param> /// <returns> /// true if the sequence would still be valid with the new outcome, false otherwise. /// </returns> protected internal virtual bool ValidSequence(int index, ArrayList inputSequence, Sequence outcomesSequence, string outcome) { return true; }
/// <summary> /// Returns the best sequence of outcomes based on model for this object.</summary> /// <param name="numSequences"> /// The maximum number of sequences to be returned. /// </param> /// <param name="sequence"> /// The input sequence. /// </param> /// <param name="additionalContext"> /// An object[] of additional context. This is passed to the context generator blindly with the assumption that the context are appropiate. /// </param> /// <param name="minSequenceScore"> /// A lower bound on the score of a returned sequence.</param> /// <returns> /// An array of the top ranked sequences of outcomes. /// </returns> public virtual Sequence[] BestSequences(int numSequences, object[] sequence, object[] additionalContext, double minSequenceScore) { int sequenceCount = sequence.Length; ListHeap<Sequence> previousHeap = new ListHeap<Sequence>(Size); ListHeap<Sequence> nextHeap = new ListHeap<Sequence>(Size); ListHeap<Sequence> tempHeap; previousHeap.Add(new Sequence()); if (additionalContext == null) { additionalContext = mEmptyAdditionalContext; } for (int currentSequence = 0; currentSequence < sequenceCount; currentSequence++) { int sz = System.Math.Min(Size, previousHeap.Size); int sc = 0; for (; previousHeap.Size > 0 && sc < sz; sc++) { Sequence topSequence = previousHeap.Extract(); String[] outcomes = topSequence.Outcomes.ToArray(); String[] contexts = ContextGenerator.GetContext(currentSequence, sequence, outcomes, additionalContext); double[] scores; if (mContextsCache != null) { scores = (double[]) mContextsCache[contexts]; if (scores == null) { scores = Model.Evaluate(contexts, mProbabilities); mContextsCache[contexts] = scores; } } else { scores = Model.Evaluate(contexts, mProbabilities); } double[] tempScores = new double[scores.Length]; Array.Copy(scores, tempScores, scores.Length); Array.Sort(tempScores); double minimum = tempScores[System.Math.Max(0, scores.Length - Size)]; for (int currentScore = 0; currentScore < scores.Length; currentScore++) { if (scores[currentScore] < minimum) { continue; //only advance first "size" outcomes } string outcomeName = Model.GetOutcomeName(currentScore); if (ValidSequence(currentSequence, sequence, outcomes, outcomeName)) { Sequence newSequence = new Sequence(topSequence, outcomeName, scores[currentScore]); if (newSequence.Score > minSequenceScore) { nextHeap.Add(newSequence); } } } if (nextHeap.Size == 0) {//if no advanced sequences, advance all valid for (int currentScore = 0; currentScore < scores.Length; currentScore++) { string outcomeName = Model.GetOutcomeName(currentScore); if (ValidSequence(currentSequence, sequence, outcomes, outcomeName)) { Sequence newSequence = new Sequence(topSequence, outcomeName, scores[currentScore]); if (newSequence.Score > minSequenceScore) { nextHeap.Add(newSequence); } } } } //nextHeap.Sort(); } // make prev = next; and re-init next (we reuse existing prev set once we clear it) previousHeap.Clear(); tempHeap = previousHeap; previousHeap = nextHeap; nextHeap = tempHeap; } int topSequenceCount = System.Math.Min(numSequences, previousHeap.Size); Sequence[] topSequences = new Sequence[topSequenceCount]; int sequenceIndex = 0; for (; sequenceIndex < topSequenceCount; sequenceIndex++) { topSequences[sequenceIndex] = (Sequence) previousHeap.Extract(); } return topSequences; }
public virtual string[] GetContext(int index, List<string> sequence, Sequence outcomesSequence, object[] additionalContext) { return GetContext(index, sequence, outcomesSequence.Outcomes, (IDictionary<string, string>) additionalContext[0]); }
protected internal override bool ValidSequence(int index, ArrayList sequence, Sequence outcomeSequence, string outcome) { return mNameFinder.ValidOutcome(outcome, outcomeSequence); }
/// <summary> /// This method determines wheter the outcome is valid for the preceding sequence. /// This can be used to implement constraints on what sequences are valid. /// </summary> /// <param name="outcome"> /// The outcome. /// </param> /// <param name="sequence"> /// The preceding sequence of outcome assignments. /// </param> /// <returns> /// true is the outcome is valid for the sequence, false otherwise. /// </returns> protected internal virtual bool ValidOutcome(string outcome, Sequence sequence) { if (outcome == Continue) { string[] tags = sequence.Outcomes.ToArray(); int lastTag = tags.Length - 1; if (lastTag == -1) { return false; } else if (tags[lastTag] == Other) { return false; } } return true; }
public virtual string[] Find(object[] tokens, IDictionary previousTags) { mBestSequence = mBeam.BestSequence(tokens, new object[]{previousTags}); ArrayList outcomes = new ArrayList(mBestSequence.Outcomes); return (string[]) outcomes.ToArray(typeof(string)); }
public virtual ArrayList Find(ArrayList tokens, IDictionary previousTags) { mBestSequence = mBeam.BestSequence(tokens, new object[]{previousTags}); return new ArrayList(mBestSequence.Outcomes); }
/// <summary> /// Creates a new sequence of outcomes based on an existing sequence. /// </summary> /// <param name="sequenceToCopy"> /// The sequence to base the new sequence on. /// </param> /// <param name="outcome"> /// An additional outcome to add onto the sequence. /// </param> /// <param name="probability"> /// An existing probability to add onto the sequence. /// </param> public Sequence(Sequence sequenceToCopy, string outcome, double probability) { _outcomes = new List<string>(sequenceToCopy.Outcomes.Count + 1); _outcomes.AddRange(sequenceToCopy.Outcomes); _outcomes.Add(outcome); _probabilities = new List<double>(sequenceToCopy.Probabilities.Count + 1); _probabilities.AddRange(sequenceToCopy.Probabilities); _probabilities.Add(probability); _score = sequenceToCopy.Score + System.Math.Log(probability); }
// Methods -------------------------- public virtual string[] Find(string[] tokens, IDictionary previousTags) { mBestSequence = mBeam.BestSequence(tokens, new object[]{previousTags}); return mBestSequence.Outcomes.ToArray(); }