private LightSentence ProcessSentence(string text) { var tokens = tokenizer.Tokenize(text); if (tokens.Length <= 0) { return(null); } var tags = posTagger.Tag(tokens); var currentSentence = new LightSentence(); currentSentence.Text = text; currentSentence.Words = new LightWord[tokens.Length]; for (var i = 0; i < tokens.Length; i++) { var wordData = new LightWord(); wordData.Tag = tags[i]; wordData.Text = tokens[i]; currentSentence.Words[i] = wordData; } NERExtraction(currentSentence, tokens); PhraseExtraction(currentSentence, tokens, tags); return(currentSentence); }
public void TestPosTagger() { var posModel = TrainPosModel(); var tagger = new POSTaggerME(posModel); var tags = tagger.Tag(new[] { "The", "driver", "got", "badly", "injured", "." }); Assert.AreEqual(6, tags.Length); Assert.AreEqual("DT", tags[0]); Assert.AreEqual("NN", tags[1]); Assert.AreEqual("VBD", tags[2]); Assert.AreEqual("RB", tags[3]); Assert.AreEqual("VBN", tags[4]); Assert.AreEqual(".", tags[5]); }
public PartsOfSpeech Recognize(string word) { var partOfSpeech = PosTagger.Tag(new[] { word }); return(partsOfSpeechAndTags.FirstOrDefault(k => k.Value.Contains(partOfSpeech[0])).Key); }
public string GetPartOfSpeech(string word) { return(posTagger.Tag(new[] { word })[0][0].ToString()); }
protected override LightDocument ActualProcess(ParseRequest request) { var sentences = sentenceSplitter.Split(request.Document.Text).ToArray(); var sentenceDataList = new List <SentenceData>(sentences.Length); foreach (var sentence in sentences) { var text = repairHandler.Repair(sentence); if (sentence != text) { log.LogDebug("Sentence repaired!"); } var sentenceData = new SentenceData { Text = text }; sentenceData.Tokens = tokenizer.Tokenize(sentenceData.Text); if (sentenceData.Tokens.Length <= 0) { continue; } sentenceData.Tags = posTagger.Tag(sentenceData.Tokens); sentenceData.Chunks = chunker.ChunkAsSpans(sentenceData.Tokens, sentenceData.Tags).ToArray(); sentenceDataList.Add(sentenceData); } var document = new LightDocument(); document.Text = request.Document.Text; document.Sentences = new LightSentence[sentenceDataList.Count]; for (var index = 0; index < sentenceDataList.Count; index++) { SentenceData sentenceData = sentenceDataList[index]; if (string.IsNullOrWhiteSpace(sentenceData.Text)) { continue; } var currentSentence = new LightSentence(); currentSentence.Text = sentenceData.Text; document.Sentences[index] = currentSentence; var chunks = new Dictionary <int, Span>(); foreach (Span chunk in sentenceData.Chunks) { for (var i = chunk.Start; i < chunk.End; i++) { chunks[i] = chunk; } } currentSentence.Words = new LightWord[sentenceData.Tokens.Length]; for (var i = 0; i < sentenceData.Tokens.Length; i++) { var wordData = new LightWord(); wordData.Tag = sentenceData.Tags[i]; wordData.Text = sentenceData.Tokens[i]; currentSentence.Words[i] = wordData; if (chunks.TryGetValue(i, out Span chunk)) { wordData.Phrase = chunk.Type; } } } return(document); }