Пример #1
0
 /// <summary>Returns whether a chunk ended between the previous and current token.</summary>
 /// <param name="prev">- the label/tag/type of the previous token</param>
 /// <param name="cur">- the label/tag/type of the current token</param>
 /// <returns>true if the previous token was the last token of a chunk</returns>
 public static bool IsEndOfChunk(LabeledChunkIdentifier.LabelTagType prev, LabeledChunkIdentifier.LabelTagType cur)
 {
     if (prev == null)
     {
         return(false);
     }
     return(IsEndOfChunk(prev.tag, prev.type, cur.tag, cur.type));
 }
Пример #2
0
 /// <summary>Returns whether a chunk started between the previous and current token</summary>
 /// <param name="prev">- the label/tag/type of the previous token</param>
 /// <param name="cur">- the label/tag/type of the current token</param>
 /// <returns>true if the current token was the first token of a chunk</returns>
 public static bool IsStartOfChunk(LabeledChunkIdentifier.LabelTagType prev, LabeledChunkIdentifier.LabelTagType cur)
 {
     if (prev == null)
     {
         return(IsStartOfChunk("O", "O", cur.tag, cur.type));
     }
     else
     {
         return(IsStartOfChunk(prev.tag, prev.type, cur.tag, cur.type));
     }
 }
Пример #3
0
        /// <summary>Find and annotate chunks.</summary>
        /// <remarks>
        /// Find and annotate chunks.  Returns list of CoreMap (Annotation) objects
        /// each representing a chunk with the following annotations set:
        /// CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk
        /// CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk
        /// TokensAnnotation - List of tokens in this chunk
        /// TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens)
        /// TokenEndAnnotation - Index of last token in chunk (index in original list of tokens)
        /// TextAnnotation - String representing tokens in this chunks (token text separated by space)
        /// </remarks>
        /// <param name="tokens">- List of tokens to look for chunks</param>
        /// <param name="totalTokensOffset">- Index of tokens to offset by</param>
        /// <param name="labelKey">- Key to use to find the token label (to determine if inside chunk or not)</param>
        /// <param name="textKey">- Key to use to find the token text</param>
        /// <param name="tokenChunkKey">- If not null, each token is annotated with the chunk using this key</param>
        /// <param name="tokenLabelKey">- If not null, each token is annotated with the text associated with the chunk using this key</param>
        /// <param name="checkTokensCompatible">- If not null, additional check to see if this token and the previous are compatible</param>
        /// <returns>List of annotations (each as a CoreMap) representing the chunks of tokens</returns>
        public virtual IList <ICoreMap> GetAnnotatedChunks(IList <CoreLabel> tokens, int totalTokensOffset, Type textKey, Type labelKey, Type tokenChunkKey, Type tokenLabelKey, IPredicate <Pair <CoreLabel, CoreLabel> > checkTokensCompatible)
        {
            IList <ICoreMap> chunks = new ArrayList();

            LabeledChunkIdentifier.LabelTagType prevTagType = null;
            int tokenBegin = -1;

            for (int i = 0; i < tokens.Count; i++)
            {
                CoreLabel token = tokens[i];
                string    label = (string)token.Get(labelKey);
                LabeledChunkIdentifier.LabelTagType curTagType = GetTagType(label);
                bool isCompatible = true;
                if (checkTokensCompatible != null)
                {
                    CoreLabel prev = null;
                    if (i > 0)
                    {
                        prev = tokens[i - 1];
                    }
                    Pair <CoreLabel, CoreLabel> p = Pair.MakePair(token, prev);
                    isCompatible = checkTokensCompatible.Test(p);
                }
                if (IsEndOfChunk(prevTagType, curTagType) || !isCompatible)
                {
                    int tokenEnd = i;
                    if (tokenBegin >= 0 && tokenEnd > tokenBegin)
                    {
                        ICoreMap chunk = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey);
                        chunk.Set(labelKey, prevTagType.type);
                        chunks.Add(chunk);
                        tokenBegin = -1;
                    }
                }
                if (IsStartOfChunk(prevTagType, curTagType) || (!isCompatible && IsChunk(curTagType)))
                {
                    if (tokenBegin >= 0)
                    {
                        throw new Exception("New chunk started, prev chunk not ended yet!");
                    }
                    tokenBegin = i;
                }
                prevTagType = curTagType;
            }
            if (tokenBegin >= 0)
            {
                ICoreMap chunk = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenBegin, tokens.Count, totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey);
                chunk.Set(labelKey, prevTagType.type);
                chunks.Add(chunk);
            }
            //    System.out.println("number of chunks " +  chunks.size());
            return(chunks);
        }
Пример #4
0
 public virtual bool TypeMatches(LabeledChunkIdentifier.LabelTagType other)
 {
     return(this.type.Equals(other.type));
 }
Пример #5
0
 private static bool IsChunk(LabeledChunkIdentifier.LabelTagType cur)
 {
     return(!"O".Equals(cur.tag) && !".".Equals(cur.tag));
 }