コード例 #1
0
        protected override LightDocument ActualProcess(ParseRequest request)
        {
            // NOT Thread Safe
            var sentences = sentenceSplitter.Split(request.Document.Text).ToArray();

            var document = new LightDocument();

            document.Text      = request.Document.Text;
            document.Sentences = new LightSentence[sentences.Length];

            int added = 0;

            foreach (var sentence in sentences)
            {
                var text = repairHandler.Repair(sentence);
                if (sentence != text)
                {
                    log.LogTrace("Sentence repaired!");
                }

                var result = ProcessSentence(text);
                if (result != null)
                {
                    document.Sentences[added] = result;
                    added++;
                }
            }

            if (added < document.Sentences.Length)
            {
                var sentencesData = document.Sentences;
                Array.Resize(ref sentencesData, added);
                document.Sentences = sentencesData;
            }

            return(document);
        }
コード例 #2
0
        protected override LightDocument ActualProcess(ParseRequest request)
        {
            var sentences        = sentenceSplitter.Split(request.Document.Text).ToArray();
            var sentenceDataList = new List <SentenceData>(sentences.Length);

            foreach (var sentence in sentences)
            {
                var text = repairHandler.Repair(sentence);
                if (sentence != text)
                {
                    log.LogDebug("Sentence repaired!");
                }

                var sentenceData = new SentenceData {
                    Text = text
                };
                sentenceData.Tokens = tokenizer.Tokenize(sentenceData.Text);
                if (sentenceData.Tokens.Length <= 0)
                {
                    continue;
                }

                sentenceData.Tags   = posTagger.Tag(sentenceData.Tokens);
                sentenceData.Chunks = chunker.ChunkAsSpans(sentenceData.Tokens, sentenceData.Tags).ToArray();
                sentenceDataList.Add(sentenceData);
            }

            var document = new LightDocument();

            document.Text      = request.Document.Text;
            document.Sentences = new LightSentence[sentenceDataList.Count];
            for (var index = 0; index < sentenceDataList.Count; index++)
            {
                SentenceData sentenceData = sentenceDataList[index];
                if (string.IsNullOrWhiteSpace(sentenceData.Text))
                {
                    continue;
                }

                var currentSentence = new LightSentence();
                currentSentence.Text = sentenceData.Text;

                document.Sentences[index] = currentSentence;
                var chunks = new Dictionary <int, Span>();
                foreach (Span chunk in sentenceData.Chunks)
                {
                    for (var i = chunk.Start; i < chunk.End; i++)
                    {
                        chunks[i] = chunk;
                    }
                }

                currentSentence.Words = new LightWord[sentenceData.Tokens.Length];
                for (var i = 0; i < sentenceData.Tokens.Length; i++)
                {
                    var wordData = new LightWord();
                    wordData.Tag             = sentenceData.Tags[i];
                    wordData.Text            = sentenceData.Tokens[i];
                    currentSentence.Words[i] = wordData;

                    if (chunks.TryGetValue(i, out Span chunk))
                    {
                        wordData.Phrase = chunk.Type;
                    }
                }
            }

            return(document);
        }