/// <summary>
        /// extracts the entity starting at the given position
        /// and adds it to the entity list.
        /// </summary>
        /// <remarks>
        /// extracts the entity starting at the given position
        /// and adds it to the entity list.  returns the index
        /// of the last element in the entity (<b>not</b> index+1)
        /// </remarks>
        public virtual EntityBIO ExtractEntity(int[] sequence, int position, string tag)
        {
            EntityBIO entity = new EntityBIO();

            entity.type          = tagIndex.IndexOf(tag);
            entity.startPosition = position;
            entity.words         = new List <string>();
            entity.words.Add(wordDoc[position]);
            int pos = position + 1;

            for (; pos < sequence.Length; pos++)
            {
                string   rawTag = classIndex.Get(sequence[pos]);
                string[] parts  = rawTag.Split("-");
                if (parts[0].Equals("I") && parts[1].Equals(tag))
                {
                    string word = wordDoc[pos];
                    entity.words.Add(word);
                }
                else
                {
                    break;
                }
            }
            entity.otherOccurrences = OtherOccurrences(entity);
            return(entity);
        }
 private void AddEntityToEntitiesArray(EntityBIO entity)
 {
     for (int j = entity.startPosition; j < entity.startPosition + entity.words.Count; j++)
     {
         entities[j] = entity;
     }
 }
        /// <summary>
        /// finds other locations in the sequence where the sequence of
        /// words in this entity occurs.
        /// </summary>
        public virtual int[] OtherOccurrences(EntityBIO entity)
        {
            IList <int> other = new List <int>();

            for (int i = 0; i < wordDoc.Count; i++)
            {
                if (i == entity.startPosition)
                {
                    continue;
                }
                if (Matches(entity, i))
                {
                    other.Add(int.Parse(i));
                }
            }
            return(ToArray(other));
        }
        public virtual bool Matches(EntityBIO entity, int position)
        {
            string word = wordDoc[position];

            if (Sharpen.Runtime.EqualsIgnoreCase(word, entity.words[0]))
            {
                for (int j = 1; j < entity.words.Count; j++)
                {
                    if (position + j >= wordDoc.Count)
                    {
                        return(false);
                    }
                    string nextWord = wordDoc[position + j];
                    if (!Sharpen.Runtime.EqualsIgnoreCase(nextWord, entity.words[j]))
                    {
                        return(false);
                    }
                }
                return(true);
            }
            return(false);
        }
 public virtual void SetInitialSequence(int[] initialSequence)
 {
     this.sequence = initialSequence;
     entities      = new EntityBIO[initialSequence.Length];
     // Arrays.fill(entities, null);  // not needed; Java arrays zero initialized
     for (int i = 0; i < initialSequence.Length; i++)
     {
         if (initialSequence[i] != backgroundSymbol)
         {
             string   rawTag = classIndex.Get(sequence[i]);
             string[] parts  = rawTag.Split("-");
             //TODO(mengqiu) this needs to be updated, so that initial can be I as well
             if (parts[0].Equals("B"))
             {
                 // B-
                 EntityBIO entity = ExtractEntity(initialSequence, i, parts[1]);
                 AddEntityToEntitiesArray(entity);
                 i += entity.words.Count - 1;
             }
         }
     }
 }
 public virtual void UpdateSequenceElement(int[] sequence, int position, int oldVal)
 {
     this.sequence = sequence;
     if (sequence[position] == oldVal)
     {
         return;
     }
     if (Verbose)
     {
         log.Info("changing position " + position + " from " + classIndex.Get(oldVal) + " to " + classIndex.Get(sequence[position]));
     }
     if (sequence[position] == backgroundSymbol)
     {
         // new tag is O
         string   oldRawTag = classIndex.Get(oldVal);
         string[] oldParts  = oldRawTag.Split("-");
         if (oldParts[0].Equals("B"))
         {
             // old tag was a B, current entity definitely affected, also check next one
             EntityBIO entity = entities[position];
             if (entity == null)
             {
                 throw new Exception("oldTag starts with B, entity at position should not be null");
             }
             // remove entities for all words affected by this entity
             for (int i = 0; i < entity.words.Count; i++)
             {
                 entities[position + i] = null;
             }
         }
         else
         {
             // old tag was a I, check previous one
             if (entities[position] != null)
             {
                 // this was part of an entity, shortened
                 if (Verbose)
                 {
                     log.Info("splitting off prev entity");
                 }
                 EntityBIO      oldEntity = entities[position];
                 int            oldLen    = oldEntity.words.Count;
                 int            offset    = position - oldEntity.startPosition;
                 IList <string> newWords  = new List <string>();
                 for (int i = 0; i < offset; i++)
                 {
                     newWords.Add(oldEntity.words[i]);
                 }
                 oldEntity.words            = newWords;
                 oldEntity.otherOccurrences = OtherOccurrences(oldEntity);
                 // need to clean any remaining entity
                 for (int i_1 = 0; i_1 < oldLen - offset; i_1++)
                 {
                     entities[position + i_1] = null;
                 }
                 if (Verbose && position > 0)
                 {
                     log.Info("position:" + position + ", entities[position-1] = " + entities[position - 1].ToString(tagIndex));
                 }
             }
         }
     }
     else
     {
         // otherwise, non-entity part I-xxx -> O, no enitty affected
         string   rawTag = classIndex.Get(sequence[position]);
         string[] parts  = rawTag.Split("-");
         if (parts[0].Equals("B"))
         {
             // new tag is B
             if (oldVal == backgroundSymbol)
             {
                 // start a new entity, may merge with the next word
                 EntityBIO entity = ExtractEntity(sequence, position, parts[1]);
                 AddEntityToEntitiesArray(entity);
             }
             else
             {
                 string   oldRawTag = classIndex.Get(oldVal);
                 string[] oldParts  = oldRawTag.Split("-");
                 if (oldParts[0].Equals("B"))
                 {
                     // was a different B-xxx
                     EntityBIO oldEntity = entities[position];
                     if (oldEntity.words.Count > 1)
                     {
                         // remove all old entity, add new singleton
                         for (int i = 0; i < oldEntity.words.Count; i++)
                         {
                             entities[position + i] = null;
                         }
                         EntityBIO entity = ExtractEntity(sequence, position, parts[1]);
                         AddEntityToEntitiesArray(entity);
                     }
                     else
                     {
                         // extract entity
                         EntityBIO entity = ExtractEntity(sequence, position, parts[1]);
                         AddEntityToEntitiesArray(entity);
                     }
                 }
                 else
                 {
                     // was I
                     EntityBIO oldEntity = entities[position];
                     if (oldEntity != null)
                     {
                         // break old entity
                         int            oldLen   = oldEntity.words.Count;
                         int            offset   = position - oldEntity.startPosition;
                         IList <string> newWords = new List <string>();
                         for (int i = 0; i < offset; i++)
                         {
                             newWords.Add(oldEntity.words[i]);
                         }
                         oldEntity.words            = newWords;
                         oldEntity.otherOccurrences = OtherOccurrences(oldEntity);
                         // need to clean any remaining entity
                         for (int i_1 = 0; i_1 < oldLen - offset; i_1++)
                         {
                             entities[position + i_1] = null;
                         }
                     }
                     EntityBIO entity = ExtractEntity(sequence, position, parts[1]);
                     AddEntityToEntitiesArray(entity);
                 }
             }
         }
         else
         {
             // new tag is I
             if (oldVal == backgroundSymbol)
             {
                 // check if previous entity extends into this one
                 if (position > 0)
                 {
                     if (entities[position - 1] != null)
                     {
                         string    oldTag = tagIndex.Get(entities[position - 1].type);
                         EntityBIO entity = ExtractEntity(sequence, position - 1 - entities[position - 1].words.Count + 1, oldTag);
                         AddEntityToEntitiesArray(entity);
                     }
                 }
             }
             else
             {
                 string   oldRawTag = classIndex.Get(oldVal);
                 string[] oldParts  = oldRawTag.Split("-");
                 if (oldParts[0].Equals("B"))
                 {
                     // was a B, clean the B entity first, then check if previous is an entity
                     EntityBIO oldEntity = entities[position];
                     for (int i = 0; i < oldEntity.words.Count; i++)
                     {
                         entities[position + i] = null;
                     }
                     if (position > 0)
                     {
                         if (entities[position - 1] != null)
                         {
                             string oldTag = tagIndex.Get(entities[position - 1].type);
                             if (Verbose)
                             {
                                 log.Info("position:" + position + ", entities[position-1] = " + entities[position - 1].ToString(tagIndex));
                             }
                             EntityBIO entity = ExtractEntity(sequence, position - 1 - entities[position - 1].words.Count + 1, oldTag);
                             AddEntityToEntitiesArray(entity);
                         }
                     }
                 }
                 else
                 {
                     // was a differnt I-xxx,
                     if (entities[position] != null)
                     {
                         // shorten the previous one, remove any additional parts
                         EntityBIO      oldEntity = entities[position];
                         int            oldLen    = oldEntity.words.Count;
                         int            offset    = position - oldEntity.startPosition;
                         IList <string> newWords  = new List <string>();
                         for (int i = 0; i < offset; i++)
                         {
                             newWords.Add(oldEntity.words[i]);
                         }
                         oldEntity.words            = newWords;
                         oldEntity.otherOccurrences = OtherOccurrences(oldEntity);
                         // need to clean any remaining entity
                         for (int i_1 = 0; i_1 < oldLen - offset; i_1++)
                         {
                             entities[position + i_1] = null;
                         }
                     }
                     else
                     {
                         // re-calc entity of the previous entity if exist
                         if (position > 0)
                         {
                             if (entities[position - 1] != null)
                             {
                                 string    oldTag = tagIndex.Get(entities[position - 1].type);
                                 EntityBIO entity = ExtractEntity(sequence, position - 1 - entities[position - 1].words.Count + 1, oldTag);
                                 AddEntityToEntitiesArray(entity);
                             }
                         }
                     }
                 }
             }
         }
     }
 }
        public override double ScoreOf(int[] sequence)
        {
            double p = 0.0;

            for (int i = 0; i < entities.Length; i++)
            {
                EntityBIO entity = entities[i];
                if ((i == 0 || entities[i - 1] != entity) && entity != null)
                {
                    int length = entity.words.Count;
                    int tag1   = entity.type;
                    // String tag1 = classIndex.get(entity.type);
                    int[] other = entities[i].otherOccurrences;
                    foreach (int otherOccurrence in other)
                    {
                        EntityBIO otherEntity = null;
                        for (int k = otherOccurrence; k < otherOccurrence + length && k < entities.Length; k++)
                        {
                            otherEntity = entities[k];
                            if (otherEntity != null)
                            {
                                break;
                            }
                        }
                        // singleton + other instance null?
                        if (otherEntity == null)
                        {
                            continue;
                        }
                        int oLength = otherEntity.words.Count;
                        // String tag2 = classIndex.get(otherEntity.type);
                        int tag2 = otherEntity.type;
                        // exact match??
                        bool  exact  = false;
                        int[] oOther = otherEntity.otherOccurrences;
                        foreach (int index in oOther)
                        {
                            if (index >= i && index <= i + length - 1)
                            {
                                exact = true;
                                break;
                            }
                        }
                        double factor;
                        // initialized in 2 cases below
                        if (exact)
                        {
                            if (Debug)
                            {
                                log.Info("Exact match of tag1=" + tagIndex.Get(tag1) + ", tag2=" + tagIndex.Get(tag2));
                            }
                            // entity not complete
                            if (length != oLength)
                            {
                                // if (DEBUG)
                                //   log.info("Entity Not Complete");
                                if (tag1 == tag2)
                                {
                                    p += Math.Abs(oLength - length) * p1;
                                }
                                else
                                {
                                    if (!(tag1 == ORGIndex && tag2 == LOCIndex) && !(tag1 == LOCIndex && tag2 == ORGIndex))
                                    {
                                        // shorter
                                        p += (oLength + length) * p1;
                                    }
                                }
                            }
                            factor = entityMatrix[tag1][tag2];
                        }
                        else
                        {
                            if (Debug)
                            {
                                log.Info("Sub  match of tag1=" + tagIndex.Get(tag1) + ", tag2=" + tagIndex.Get(tag2));
                            }
                            factor = subEntityMatrix[tag1][tag2];
                        }
                        if (tag1 == tag2)
                        {
                            if (flags.matchNERIncentive)
                            {
                                factor = p2;
                            }
                            else
                            {
                                // factor *= -1;
                                factor = 0;
                            }
                        }
                        if (Debug)
                        {
                            log.Info(" of factor=" + factor + ", p += " + (length * factor));
                        }
                        p += length * factor;
                    }
                }
            }
            return(p);
        }