///<summary>
 ///Inserts the specified constituent into this parse based on its text span.  This
 ///method assumes that the specified constituent can be inserted into this parse.
 ///</summary>
 ///<param name="constituent">
 ///The constituent to be inserted.
 ///</param>
 public virtual void Insert(Parse constituent)
 {
     Util.Span constituentSpan = constituent.mSpan;
     if (mSpan.Contains(constituentSpan))
     {
         int currentPart;
         int partCount = mParts.Count;
         for (currentPart = 0; currentPart < partCount; currentPart++)
         {
             Parse     subPart     = mParts[currentPart];
             Util.Span subPartSpan = subPart.mSpan;
             if (subPartSpan.Start > constituentSpan.End)
             {
                 break;
             }
             // constituent Contains subPart
             else if (constituentSpan.Contains(subPartSpan))
             {
                 mParts.RemoveAt(currentPart);
                 currentPart--;
                 constituent.mParts.Add(subPart);
                 subPart.Parent = constituent;
                 partCount      = mParts.Count;
             }
             else if (subPartSpan.Contains(constituentSpan))
             {
                 //System.Console.WriteLine("Parse.insert:subPart contains con");
                 subPart.Insert(constituent);
                 return;
             }
         }
         mParts.Insert(currentPart, constituent);
         constituent.Parent = this;
     }
     else
     {
         throw new ParseException("Inserting constituent not contained in the sentence!");
     }
 }
        public virtual void AddEvents(Util.Span[] tokens, string input)
        {
            if (tokens.Length > 0)
            {
                int         startPosition      = tokens[0].Start;
                int         endPosition        = tokens[tokens.Length - 1].End;
                string      sentence           = input.Substring(startPosition, (endPosition) - (startPosition));
                Util.Span[] candidateTokens    = MaximumEntropyTokenizer.SplitOnWhitespaces(sentence);
                int         firstTrainingToken = -1;
                int         lastTrainingToken  = -1;

                for (int currentCandidate = 0; currentCandidate < candidateTokens.Length; currentCandidate++)
                {
                    Util.Span candidateSpan  = candidateTokens[currentCandidate];
                    string    candidateToken = sentence.Substring(candidateSpan.Start, (candidateSpan.End) - (candidateSpan.Start));
                    //adjust candidateSpan to text offsets
                    candidateSpan = new Util.Span(candidateSpan.Start + startPosition, candidateSpan.End + startPosition);
                    //should we skip this token
                    if (candidateToken.Length > 1 && (!mSkipAlphanumerics || !MaximumEntropyTokenizer.AlphaNumeric.IsMatch(candidateToken)))
                    {
                        //find offsets of annotated tokens inside candidate tokens
                        bool foundTrainingTokens = false;
                        for (int currentToken = lastTrainingToken + 1; currentToken < tokens.Length; currentToken++)
                        {
                            if (candidateSpan.Contains(tokens[currentToken]))
                            {
                                if (!foundTrainingTokens)
                                {
                                    firstTrainingToken  = currentToken;
                                    foundTrainingTokens = true;
                                }
                                lastTrainingToken = currentToken;
                            }
                            else if (candidateSpan.End < tokens[currentToken].End)
                            {
                                break;
                            }
                            else if (tokens[currentToken].End < candidateSpan.Start)
                            {
                                //keep looking
                            }
                            else
                            {
                                throw new ApplicationException("Bad training token: " + tokens[currentToken] + " cand: " + candidateSpan);
                            }
                        }
                        // create training data
                        if (foundTrainingTokens)
                        {
                            for (int currentToken = firstTrainingToken; currentToken <= lastTrainingToken; currentToken++)
                            {
                                Util.Span trainingTokenSpan = tokens[currentToken];

                                int candidateStart = candidateSpan.Start;
                                for (int currentPosition = trainingTokenSpan.Start + 1; currentPosition < trainingTokenSpan.End; currentPosition++)
                                {
                                    string[] context = mContextGenerator.GetContext(new Tuple <string, int>(candidateToken, currentPosition - candidateStart));
                                    mEvents.Add(new SharpEntropy.TrainingEvent(TokenContextGenerator.NoSplitIndicator, context));
                                }
                                if (trainingTokenSpan.End != candidateSpan.End)
                                {
                                    string[] context = mContextGenerator.GetContext(new Tuple <string, int>(candidateToken, trainingTokenSpan.End - candidateStart));
                                    mEvents.Add(new SharpEntropy.TrainingEvent(TokenContextGenerator.SplitIndicator, context));
                                }
                            }
                        }
                    }
                }
            }
        }
        public virtual void AddEvents(Util.Span[] tokens, string input)
        {
            if (tokens.Length > 0)
            {
                int startPosition = tokens[0].Start;
                int endPosition = tokens[tokens.Length - 1].End;
                string sentence = input.Substring(startPosition, (endPosition) - (startPosition));
                Util.Span[] candidateTokens = MaximumEntropyTokenizer.Split(sentence);
                int firstTrainingToken = -1;
                int lastTrainingToken = -1;

                for (int currentCandidate = 0; currentCandidate < candidateTokens.Length; currentCandidate++)
                {
                    Util.Span candidateSpan = candidateTokens[currentCandidate];
                    string candidateToken = sentence.Substring(candidateSpan.Start, (candidateSpan.End) - (candidateSpan.Start));
                    //adjust candidateSpan to text offsets
                    candidateSpan = new Util.Span(candidateSpan.Start + startPosition, candidateSpan.End + startPosition);
                    //should we skip this token
                    if (candidateToken.Length > 1 && (!mSkipAlphanumerics || !MaximumEntropyTokenizer.AlphaNumeric.IsMatch(candidateToken)))
                    {
                        //find offsets of annotated tokens inside candidate tokens
                        bool foundTrainingTokens = false;
                        for (int currentToken = lastTrainingToken + 1; currentToken < tokens.Length; currentToken++)
                        {
                            if (candidateSpan.Contains(tokens[currentToken]))
                            {
                                if (!foundTrainingTokens)
                                {
                                    firstTrainingToken = currentToken;
                                    foundTrainingTokens = true;
                                }
                                lastTrainingToken = currentToken;
                            }
                            else if (candidateSpan.End < tokens[currentToken].End)
                            {
                                break;
                            }
                            else if (tokens[currentToken].End < candidateSpan.Start)
                            {
                                //keep looking
                            }
                            else
                            {
                                throw new ApplicationException("Bad training token: " + tokens[currentToken] + " cand: " + candidateSpan);
                            }
                        }
                        // create training data
                        if (foundTrainingTokens)
                        {
                            for (int currentToken = firstTrainingToken; currentToken <= lastTrainingToken; currentToken++)
                            {
                                Util.Span trainingTokenSpan = tokens[currentToken];

                                int candidateStart = candidateSpan.Start;
                                for (int currentPosition = trainingTokenSpan.Start + 1; currentPosition < trainingTokenSpan.End; currentPosition++)
                                {
                                    string[] context = mContextGenerator.GetContext(new Util.Pair<string, int>(candidateToken, currentPosition - candidateStart));
                                    mEvents.Add(new SharpEntropy.TrainingEvent(TokenContextGenerator.NoSplitIndicator, context));
                                }
                                if (trainingTokenSpan.End != candidateSpan.End)
                                {
                                    string[] context = mContextGenerator.GetContext(new Util.Pair<string, int>(candidateToken, trainingTokenSpan.End - candidateStart));
                                    mEvents.Add(new SharpEntropy.TrainingEvent(TokenContextGenerator.SplitIndicator, context));
                                }
                            }
                        }
                    }
                }
            }
        }