private LightSentence ProcessSentence(string text)
        {
            var tokens = tokenizer.Tokenize(text);

            if (tokens.Length <= 0)
            {
                return(null);
            }

            var tags            = posTagger.Tag(tokens);
            var currentSentence = new LightSentence();

            currentSentence.Text  = text;
            currentSentence.Words = new LightWord[tokens.Length];

            for (var i = 0; i < tokens.Length; i++)
            {
                var wordData = new LightWord();
                wordData.Tag             = tags[i];
                wordData.Text            = tokens[i];
                currentSentence.Words[i] = wordData;
            }

            NERExtraction(currentSentence, tokens);
            PhraseExtraction(currentSentence, tokens, tags);
            return(currentSentence);
        }
Exemplo n.º 2
0
        public void TestPosTagger()
        {
            var posModel = TrainPosModel();

            var tagger = new POSTaggerME(posModel);

            var tags = tagger.Tag(new[] { "The", "driver", "got", "badly", "injured", "." });

            Assert.AreEqual(6, tags.Length);

            Assert.AreEqual("DT", tags[0]);
            Assert.AreEqual("NN", tags[1]);
            Assert.AreEqual("VBD", tags[2]);
            Assert.AreEqual("RB", tags[3]);
            Assert.AreEqual("VBN", tags[4]);
            Assert.AreEqual(".", tags[5]);
        }
Exemplo n.º 3
0
        public PartsOfSpeech Recognize(string word)
        {
            var partOfSpeech = PosTagger.Tag(new[] { word });

            return(partsOfSpeechAndTags.FirstOrDefault(k => k.Value.Contains(partOfSpeech[0])).Key);
        }
Exemplo n.º 4
0
 public string GetPartOfSpeech(string word)
 {
     return(posTagger.Tag(new[] { word })[0][0].ToString());
 }
        protected override LightDocument ActualProcess(ParseRequest request)
        {
            var sentences        = sentenceSplitter.Split(request.Document.Text).ToArray();
            var sentenceDataList = new List <SentenceData>(sentences.Length);

            foreach (var sentence in sentences)
            {
                var text = repairHandler.Repair(sentence);
                if (sentence != text)
                {
                    log.LogDebug("Sentence repaired!");
                }

                var sentenceData = new SentenceData {
                    Text = text
                };
                sentenceData.Tokens = tokenizer.Tokenize(sentenceData.Text);
                if (sentenceData.Tokens.Length <= 0)
                {
                    continue;
                }

                sentenceData.Tags   = posTagger.Tag(sentenceData.Tokens);
                sentenceData.Chunks = chunker.ChunkAsSpans(sentenceData.Tokens, sentenceData.Tags).ToArray();
                sentenceDataList.Add(sentenceData);
            }

            var document = new LightDocument();

            document.Text      = request.Document.Text;
            document.Sentences = new LightSentence[sentenceDataList.Count];
            for (var index = 0; index < sentenceDataList.Count; index++)
            {
                SentenceData sentenceData = sentenceDataList[index];
                if (string.IsNullOrWhiteSpace(sentenceData.Text))
                {
                    continue;
                }

                var currentSentence = new LightSentence();
                currentSentence.Text = sentenceData.Text;

                document.Sentences[index] = currentSentence;
                var chunks = new Dictionary <int, Span>();
                foreach (Span chunk in sentenceData.Chunks)
                {
                    for (var i = chunk.Start; i < chunk.End; i++)
                    {
                        chunks[i] = chunk;
                    }
                }

                currentSentence.Words = new LightWord[sentenceData.Tokens.Length];
                for (var i = 0; i < sentenceData.Tokens.Length; i++)
                {
                    var wordData = new LightWord();
                    wordData.Tag             = sentenceData.Tags[i];
                    wordData.Text            = sentenceData.Tokens[i];
                    currentSentence.Words[i] = wordData;

                    if (chunks.TryGetValue(i, out Span chunk))
                    {
                        wordData.Phrase = chunk.Type;
                    }
                }
            }

            return(document);
        }