/// <summary> /// Join verb parts like Dadedgan corpus. /// Input: /// دیده/ADJ_INO /// شد/V_PA /// Iutput: /// دیده شد/V_PA /// </summary> /// <param name="sentence">List of TaggedWord object </param> /// <returns>List of TaggedWord</returns> public static List <TaggedWord> JoinVerbParts(List <TaggedWord> sentence) { sentence.Reverse(); var result = new List <TaggedWord>(); var beforeTaggedWord = new TaggedWord("", ""); foreach (var taggedWord in sentence) { if (PeykareReader.tokenizer.BeforeVerbs.Contains(taggedWord.word()) || (PeykareReader.tokenizer.AfterVerbs.Contains(beforeTaggedWord.word()) && PeykareReader.tokenizer.Verbs.Contains(taggedWord.word()))) { beforeTaggedWord.setWord(taggedWord.word() + " " + beforeTaggedWord.word()); if (result.Count == 0) { result.Add(beforeTaggedWord); } } else { result.Add(taggedWord); beforeTaggedWord = taggedWord; } } result.Reverse(); return(result); }
/// <summary> /// Join verb parts like Dadedgan corpus. /// Input: /// دیده/ADJ_INO /// شد/V_PA /// Iutput: /// دیده شد/V_PA /// </summary> /// <param name="sentence">List of TaggedWord object </param> /// <returns>List of TaggedWord</returns> public static List<TaggedWord> JoinVerbParts(List<TaggedWord> sentence) { sentence.Reverse(); var result = new List<TaggedWord>(); var beforeTaggedWord = new TaggedWord("", ""); foreach (var taggedWord in sentence) { if (PeykareReader.tokenizer.BeforeVerbs.Contains(taggedWord.word()) || (PeykareReader.tokenizer.AfterVerbs.Contains(beforeTaggedWord.word()) && PeykareReader.tokenizer.Verbs.Contains(taggedWord.word()))) { beforeTaggedWord.setWord(taggedWord.word() + " " + beforeTaggedWord.word()); if (result.Count == 0) result.Add(beforeTaggedWord); } else { result.Add(taggedWord); beforeTaggedWord = taggedWord; } } result.Reverse(); return result; }
public PosSentence(List taggedSentence) { Words = new List <PosTaggedWord>(); var i = taggedSentence.iterator(); while (i.hasNext()) { TaggedWord x = (TaggedWord)i.next(); PosTaggedWord word = new PosTaggedWord(x.word(), x.tag()); Words.Add(word); } }