/// <summary> /// Evaluates the specified document. /// </summary> /// <param name="factory">The factory used in this analysis.</param> /// <param name="document">The document to be analyzed.</param> protected override void Evaluate(ITextFactory factory, IDocument document) { if (document.Sentences == null) { throw new AnalyzerException(this, "The document does not have the sentences detected."); } foreach (var sentence in document.Sentences) { var toks = sentence.GetTokens(); var tags = sentence.GetTags(); if (toks == null) { throw new AnalyzerException(this, "The document have a sentence without the tokenization."); } if (tags == null) { throw new AnalyzerException(this, "The document have a sentence without the part-of-speech tags."); } string[] chunks; lock (Chunker) { chunks = Chunker.Chunk(toks, tags); } for (var i = 0; i < chunks.Length; i++) { sentence.Tokens[i].ChunkTag = chunks[i]; } var spans = ChunkSample.PhrasesAsSpanList(toks, tags, chunks); var list = new List <IChunk>(spans.Length); foreach (var span in spans) { var chunk = factory.CreateChunk(sentence, span); if (chunk != null) { list.Add(chunk); } } sentence.Chunks = new ReadOnlyCollection <IChunk>(list); } }
/// <summary> /// Generates tagged chunk spans for the given sequence returning the result in a span array. /// </summary> /// <param name="tokens">An array of the tokens or words of the sequence.</param> /// <param name="tags">An array of the pos tags of the sequence.</param> /// <returns>An array of spans with chunk tags for each chunk in the sequence.</returns> public Span[] ChunkAsSpans(string[] tokens, string[] tags) { var preds = Chunk(tokens, tags); return(ChunkSample.PhrasesAsSpanList(tokens, tags, preds)); }