/// <summary>Returns whether a chunk ended between the previous and current token.</summary> /// <param name="prev">- the label/tag/type of the previous token</param> /// <param name="cur">- the label/tag/type of the current token</param> /// <returns>true if the previous token was the last token of a chunk</returns> public static bool IsEndOfChunk(LabeledChunkIdentifier.LabelTagType prev, LabeledChunkIdentifier.LabelTagType cur) { if (prev == null) { return(false); } return(IsEndOfChunk(prev.tag, prev.type, cur.tag, cur.type)); }
/// <summary>Returns whether a chunk started between the previous and current token</summary> /// <param name="prev">- the label/tag/type of the previous token</param> /// <param name="cur">- the label/tag/type of the current token</param> /// <returns>true if the current token was the first token of a chunk</returns> public static bool IsStartOfChunk(LabeledChunkIdentifier.LabelTagType prev, LabeledChunkIdentifier.LabelTagType cur) { if (prev == null) { return(IsStartOfChunk("O", "O", cur.tag, cur.type)); } else { return(IsStartOfChunk(prev.tag, prev.type, cur.tag, cur.type)); } }
/// <summary>Find and annotate chunks.</summary> /// <remarks> /// Find and annotate chunks. Returns list of CoreMap (Annotation) objects /// each representing a chunk with the following annotations set: /// CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk /// CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk /// TokensAnnotation - List of tokens in this chunk /// TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) /// TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) /// TextAnnotation - String representing tokens in this chunks (token text separated by space) /// </remarks> /// <param name="tokens">- List of tokens to look for chunks</param> /// <param name="totalTokensOffset">- Index of tokens to offset by</param> /// <param name="labelKey">- Key to use to find the token label (to determine if inside chunk or not)</param> /// <param name="textKey">- Key to use to find the token text</param> /// <param name="tokenChunkKey">- If not null, each token is annotated with the chunk using this key</param> /// <param name="tokenLabelKey">- If not null, each token is annotated with the text associated with the chunk using this key</param> /// <param name="checkTokensCompatible">- If not null, additional check to see if this token and the previous are compatible</param> /// <returns>List of annotations (each as a CoreMap) representing the chunks of tokens</returns> public virtual IList <ICoreMap> GetAnnotatedChunks(IList <CoreLabel> tokens, int totalTokensOffset, Type textKey, Type labelKey, Type tokenChunkKey, Type tokenLabelKey, IPredicate <Pair <CoreLabel, CoreLabel> > checkTokensCompatible) { IList <ICoreMap> chunks = new ArrayList(); LabeledChunkIdentifier.LabelTagType prevTagType = null; int tokenBegin = -1; for (int i = 0; i < tokens.Count; i++) { CoreLabel token = tokens[i]; string label = (string)token.Get(labelKey); LabeledChunkIdentifier.LabelTagType curTagType = GetTagType(label); bool isCompatible = true; if (checkTokensCompatible != null) { CoreLabel prev = null; if (i > 0) { prev = tokens[i - 1]; } Pair <CoreLabel, CoreLabel> p = Pair.MakePair(token, prev); isCompatible = checkTokensCompatible.Test(p); } if (IsEndOfChunk(prevTagType, curTagType) || !isCompatible) { int tokenEnd = i; if (tokenBegin >= 0 && tokenEnd > tokenBegin) { ICoreMap chunk = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey); chunk.Set(labelKey, prevTagType.type); chunks.Add(chunk); tokenBegin = -1; } } if (IsStartOfChunk(prevTagType, curTagType) || (!isCompatible && IsChunk(curTagType))) { if (tokenBegin >= 0) { throw new Exception("New chunk started, prev chunk not ended yet!"); } tokenBegin = i; } prevTagType = curTagType; } if (tokenBegin >= 0) { ICoreMap chunk = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenBegin, tokens.Count, totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey); chunk.Set(labelKey, prevTagType.type); chunks.Add(chunk); } // System.out.println("number of chunks " + chunks.size()); return(chunks); }
public virtual bool TypeMatches(LabeledChunkIdentifier.LabelTagType other) { return(this.type.Equals(other.type)); }
private static bool IsChunk(LabeledChunkIdentifier.LabelTagType cur) { return(!"O".Equals(cur.tag) && !".".Equals(cur.tag)); }