private List <TaggedWord> JoinVerbParts(List <TaggedWord> sentence) { sentence.Reverse(); var result = new List <TaggedWord>(); var beforeTaggedWord = new TaggedWord("", ""); foreach (var taggedWord in sentence) { if (this.tokenizer.BeforeVerbs.Contains(taggedWord.word()) || (this.tokenizer.AfterVerbs.Contains(beforeTaggedWord.word()) && this.tokenizer.Verbs.Contains(taggedWord.word()))) { beforeTaggedWord.setWord(taggedWord.word() + " " + taggedWord.word()); if (result.Count == 0) { result.Add(beforeTaggedWord); } } else { result.Add(taggedWord); beforeTaggedWord = taggedWord; } } result.Reverse(); return(result); }
/// <summary> /// Join verb parts like Dadedgan corpus. /// Input: /// دیده/ADJ_INO /// شد/V_PA /// Iutput: /// دیده شد/V_PA /// </summary> /// <param name="sentence">List of TaggedWord object </param> /// <returns>List of TaggedWord</returns> public static List<TaggedWord> JoinVerbParts(List<TaggedWord> sentence) { sentence.Reverse(); var result = new List<TaggedWord>(); var beforeTaggedWord = new TaggedWord("", ""); foreach (var taggedWord in sentence) { if (PeykareReader.tokenizer.BeforeVerbs.Contains(taggedWord.word()) || (PeykareReader.tokenizer.AfterVerbs.Contains(beforeTaggedWord.word()) && PeykareReader.tokenizer.Verbs.Contains(taggedWord.word()))) { beforeTaggedWord.setWord(taggedWord.word() + " " + beforeTaggedWord.word()); if (result.Count == 0) result.Add(beforeTaggedWord); } else { result.Add(taggedWord); beforeTaggedWord = taggedWord; } } result.Reverse(); return result; }
public List <TaggedWord> BatchTag(List <string> sentence) { string[] sen = new string[sentence.Count]; for (int i = 0; i < sentence.Count; i++) { sen[i] = sentence[i].Replace(" ", "_"); } List newSent = Sentence.toWordList(sen); ArrayList taggedSentence = this._tagger.tagSentence(newSent); var taggedSen = new List <TaggedWord>(); for (int i = 0; i < taggedSentence.size(); i++) { TaggedWord tw = (TaggedWord)taggedSentence.get(i); tw.setWord(sentence[i]); taggedSen.Add(tw); } return(taggedSen); }