예제 #1
0
		/// <summary>
		/// Creates a new sequence of outcomes by cloning an existing sequence.
		/// </summary>
		/// <param name="sequenceToCopy">
		/// The sequence to create the clone from.
		/// </param>
		public Sequence(Sequence sequenceToCopy)
		{
			_outcomes = new List<string>(sequenceToCopy.Outcomes.Count + 1);
			_outcomes.AddRange(sequenceToCopy.Outcomes);
			
			_probabilities = new List<double>(sequenceToCopy.Probabilities.Count + 1);
			_probabilities.AddRange(sequenceToCopy.Probabilities);

			_score = sequenceToCopy.Score;
		}
예제 #2
0
 /// <summary>
 /// Determines whether a particular continuation of a sequence is valid.  
 /// This is used to restrict invalid sequences such as thoses used in start/continue tag-based chunking 
 /// or could be used to implement tag dictionary restrictions.
 /// </summary>
 /// <param name="index">
 /// The index in the input sequence for which the new outcome is being proposed.
 /// </param>
 /// <param name="inputSequence">
 /// The input sequnce.
 /// </param>
 /// <param name="outcomesSequence">
 /// The outcomes so far in this sequence.
 /// </param>
 /// <param name="outcome">
 /// The next proposed outcome for the outcomes sequence.
 /// </param>
 /// <returns>
 /// true if the sequence would still be valid with the new outcome, false otherwise.
 /// </returns>
 protected internal virtual bool ValidSequence(int index, ArrayList inputSequence, Sequence outcomesSequence, string outcome)
 {
     return true;
 }
예제 #3
0
        /// <summary>
        /// Returns the best sequence of outcomes based on model for this object.</summary>
        /// <param name="numSequences">
        /// The maximum number of sequences to be returned.
        /// </param>
        /// <param name="sequence">
        /// The input sequence.
        /// </param>
        /// <param name="additionalContext">
        /// An object[] of additional context.  This is passed to the context generator blindly with the assumption that the context are appropiate.
        /// </param>
        /// <param name="minSequenceScore">
        /// A lower bound on the score of a returned sequence.</param> 
        /// <returns>
        /// An array of the top ranked sequences of outcomes.
        /// </returns>		
        public virtual Sequence[] BestSequences(int numSequences, object[] sequence, object[] additionalContext, double minSequenceScore)
        {
            int sequenceCount = sequence.Length;
            ListHeap<Sequence> previousHeap = new ListHeap<Sequence>(Size);
            ListHeap<Sequence> nextHeap = new ListHeap<Sequence>(Size);
            ListHeap<Sequence> tempHeap;

            previousHeap.Add(new Sequence());
            if (additionalContext == null)
            {
                additionalContext = mEmptyAdditionalContext;
            }
            for (int currentSequence = 0; currentSequence < sequenceCount; currentSequence++)
            {
                int sz = System.Math.Min(Size, previousHeap.Size);
                int sc = 0;
                for (; previousHeap.Size > 0 && sc < sz; sc++)
                {
                    Sequence topSequence = previousHeap.Extract();
                    String[] outcomes = topSequence.Outcomes.ToArray();
                    String[] contexts = ContextGenerator.GetContext(currentSequence, sequence, outcomes, additionalContext);
                    double[] scores;
                    if (mContextsCache != null)
                    {
                        scores = (double[]) mContextsCache[contexts];
                        if (scores == null)
                        {
                            scores = Model.Evaluate(contexts, mProbabilities);
                            mContextsCache[contexts] = scores;
                        }
                    }
                    else
                    {
                        scores = Model.Evaluate(contexts, mProbabilities);
                    }

                    double[] tempScores = new double[scores.Length];
                    Array.Copy(scores, tempScores, scores.Length);

                    Array.Sort(tempScores);
                    double minimum = tempScores[System.Math.Max(0, scores.Length - Size)];

                    for (int currentScore = 0; currentScore < scores.Length; currentScore++)
                    {
                        if (scores[currentScore] < minimum)
                        {
                            continue; //only advance first "size" outcomes
                        }

                        string outcomeName = Model.GetOutcomeName(currentScore);
                        if (ValidSequence(currentSequence, sequence, outcomes, outcomeName))
                        {
                            Sequence newSequence = new Sequence(topSequence, outcomeName, scores[currentScore]);
                            if (newSequence.Score > minSequenceScore)
                            {
                                nextHeap.Add(newSequence);
                            }
                        }
                    }
                    if (nextHeap.Size == 0)
                    {//if no advanced sequences, advance all valid
                        for (int currentScore = 0; currentScore < scores.Length; currentScore++)
                        {
                            string outcomeName = Model.GetOutcomeName(currentScore);
                            if (ValidSequence(currentSequence, sequence, outcomes, outcomeName))
                            {
                                Sequence newSequence = new Sequence(topSequence, outcomeName, scores[currentScore]);
                                if (newSequence.Score > minSequenceScore)
                                {
                                    nextHeap.Add(newSequence);
                                }
                            }
                        }
                    }
                    //nextHeap.Sort();
                }
                //    make prev = next; and re-init next (we reuse existing prev set once we clear it)
                previousHeap.Clear();
                tempHeap = previousHeap;
                previousHeap = nextHeap;
                nextHeap = tempHeap;
            }
            int topSequenceCount = System.Math.Min(numSequences, previousHeap.Size);
            Sequence[] topSequences = new Sequence[topSequenceCount];
            int sequenceIndex = 0;
            for (; sequenceIndex < topSequenceCount; sequenceIndex++)
            {
                topSequences[sequenceIndex] = (Sequence) previousHeap.Extract();
            }
            return topSequences;
        }
 public virtual string[] GetContext(int index, List<string> sequence, Sequence outcomesSequence, object[] additionalContext)
 {
     return GetContext(index, sequence, outcomesSequence.Outcomes, (IDictionary<string, string>) additionalContext[0]);
 }
 protected internal override bool ValidSequence(int index, ArrayList sequence, Sequence outcomeSequence, string outcome)
 {
     return mNameFinder.ValidOutcome(outcome, outcomeSequence);
 }
 /// <summary>
 /// This method determines wheter the outcome is valid for the preceding sequence.  
 /// This can be used to implement constraints on what sequences are valid.  
 /// </summary>
 /// <param name="outcome">
 /// The outcome.
 /// </param>
 /// <param name="sequence">
 /// The preceding sequence of outcome assignments. 
 /// </param>
 /// <returns>
 /// true is the outcome is valid for the sequence, false otherwise.
 /// </returns>
 protected internal virtual bool ValidOutcome(string outcome, Sequence sequence)
 {
     if (outcome == Continue)
     {
         string[] tags = sequence.Outcomes.ToArray();
         int lastTag = tags.Length - 1;
         if (lastTag == -1)
         {
             return false;
         }
         else if (tags[lastTag] == Other)
         {
             return false;
         }
     }
     return true;
 }
 public virtual string[] Find(object[] tokens, IDictionary previousTags)
 {
     mBestSequence = mBeam.BestSequence(tokens, new object[]{previousTags});
     ArrayList outcomes = new ArrayList(mBestSequence.Outcomes);
     return (string[]) outcomes.ToArray(typeof(string));
 }
 public virtual ArrayList Find(ArrayList tokens, IDictionary previousTags)
 {
     mBestSequence = mBeam.BestSequence(tokens, new object[]{previousTags});
     return new ArrayList(mBestSequence.Outcomes);
 }
예제 #9
0
		/// <summary>
		/// Creates a new sequence of outcomes based on an existing sequence.
		/// </summary>
		/// <param name="sequenceToCopy">
		/// The sequence to base the new sequence on.
		/// </param>
		/// <param name="outcome">
		/// An additional outcome to add onto the sequence.
		/// </param>
		/// <param name="probability">
		/// An existing probability to add onto the sequence.
		/// </param>
		public Sequence(Sequence sequenceToCopy, string outcome, double probability)
		{
			_outcomes = new List<string>(sequenceToCopy.Outcomes.Count + 1);
			_outcomes.AddRange(sequenceToCopy.Outcomes);
			_outcomes.Add(outcome);

			_probabilities = new List<double>(sequenceToCopy.Probabilities.Count + 1);
			_probabilities.AddRange(sequenceToCopy.Probabilities);
			_probabilities.Add(probability);

			_score = sequenceToCopy.Score + System.Math.Log(probability);
		}
예제 #10
0
        // Methods --------------------------
		
		public virtual string[] Find(string[] tokens, IDictionary previousTags)
		{
			mBestSequence = mBeam.BestSequence(tokens, new object[]{previousTags});
			return mBestSequence.Outcomes.ToArray();
		}