Пример #1
0
        /// <summary>
        /// Evaluates the specified document.
        /// </summary>
        /// <param name="factory">The factory used in this analysis.</param>
        /// <param name="document">The document to be analyzed.</param>
        protected override void Evaluate(ITextFactory factory, IDocument document)
        {
            if (document.Sentences == null)
            {
                throw new AnalyzerException(this, "The document does not have the sentences detected.");
            }

            foreach (var sentence in document.Sentences)
            {
                var toks = sentence.GetTokens();
                var tags = sentence.GetTags();

                if (toks == null)
                {
                    throw new AnalyzerException(this, "The document have a sentence without the tokenization.");
                }

                if (tags == null)
                {
                    throw new AnalyzerException(this, "The document have a sentence without the part-of-speech tags.");
                }

                string[] chunks;
                lock (Chunker) {
                    chunks = Chunker.Chunk(toks, tags);
                }

                for (var i = 0; i < chunks.Length; i++)
                {
                    sentence.Tokens[i].ChunkTag = chunks[i];
                }

                var spans = ChunkSample.PhrasesAsSpanList(toks, tags, chunks);
                var list  = new List <IChunk>(spans.Length);

                foreach (var span in spans)
                {
                    var chunk = factory.CreateChunk(sentence, span);
                    if (chunk != null)
                    {
                        list.Add(chunk);
                    }
                }

                sentence.Chunks = new ReadOnlyCollection <IChunk>(list);
            }
        }
Пример #2
0
        /// <summary>
        /// Generates tagged chunk spans for the given sequence returning the result in a span array.
        /// </summary>
        /// <param name="tokens">An array of the tokens or words of the sequence.</param>
        /// <param name="tags">An array of the pos tags of the sequence.</param>
        /// <returns>An array of spans with chunk tags for each chunk in the sequence.</returns>
        public Span[] ChunkAsSpans(string[] tokens, string[] tags)
        {
            var preds = Chunk(tokens, tags);

            return(ChunkSample.PhrasesAsSpanList(tokens, tags, preds));
        }