Beispiel #1
0
        /// <summary>
        /// Join verb parts like Dadedgan corpus.
        /// Input:
        ///     دیده/ADJ_INO
        ///     شد/V_PA
        /// Iutput:
        ///     دیده شد/V_PA
        /// </summary>
        /// <param name="sentence">List of TaggedWord object </param>
        /// <returns>List of TaggedWord</returns>
        public static List <TaggedWord> JoinVerbParts(List <TaggedWord> sentence)
        {
            sentence.Reverse();
            var result           = new List <TaggedWord>();
            var beforeTaggedWord = new TaggedWord("", "");

            foreach (var taggedWord in sentence)
            {
                if (PeykareReader.tokenizer.BeforeVerbs.Contains(taggedWord.word()) ||
                    (PeykareReader.tokenizer.AfterVerbs.Contains(beforeTaggedWord.word()) &&
                     PeykareReader.tokenizer.Verbs.Contains(taggedWord.word())))
                {
                    beforeTaggedWord.setWord(taggedWord.word() + " " + beforeTaggedWord.word());
                    if (result.Count == 0)
                    {
                        result.Add(beforeTaggedWord);
                    }
                }
                else
                {
                    result.Add(taggedWord);
                    beforeTaggedWord = taggedWord;
                }
            }

            result.Reverse();
            return(result);
        }
Beispiel #2
0
        /// <summary>
        /// Join verb parts like Dadedgan corpus.
        /// Input:
        ///     دیده/ADJ_INO
        ///     شد/V_PA
        /// Iutput:
        ///     دیده شد/V_PA
        /// </summary>
        /// <param name="sentence">List of TaggedWord object </param>
        /// <returns>List of TaggedWord</returns>
        public static List<TaggedWord> JoinVerbParts(List<TaggedWord> sentence)
        {
            sentence.Reverse();
            var result = new List<TaggedWord>();
            var beforeTaggedWord = new TaggedWord("", "");
            foreach (var taggedWord in sentence)
            {
                if (PeykareReader.tokenizer.BeforeVerbs.Contains(taggedWord.word()) ||
                    (PeykareReader.tokenizer.AfterVerbs.Contains(beforeTaggedWord.word()) &&
                     PeykareReader.tokenizer.Verbs.Contains(taggedWord.word())))
                {
                    beforeTaggedWord.setWord(taggedWord.word() + " " + beforeTaggedWord.word());
                    if (result.Count == 0)
                        result.Add(beforeTaggedWord);
                }
                else
                {
                    result.Add(taggedWord);
                    beforeTaggedWord = taggedWord;
                }
            }

            result.Reverse();
            return result;
        }
        public PosSentence(List taggedSentence)
        {
            Words = new List <PosTaggedWord>();

            var i = taggedSentence.iterator();

            while (i.hasNext())
            {
                TaggedWord x = (TaggedWord)i.next();

                PosTaggedWord word = new PosTaggedWord(x.word(), x.tag());
                Words.Add(word);
            }
        }