Ejemplo n.º 1
0
        /// <summary> It changes the POS tagging result with 69 KAIST tags to the simplified result with 9 tags.</summary>
        /// <param name="st">- the result of morphological analysis where each eojeol has more than analysis result
        /// </param>
        /// <returns> the simplified POS tagging result
        /// </returns>
        public virtual Sentence doProcess(Sentence st)
        {
            System.String prevTag = null;
            bool          changed = false;

            Eojeol[] eojeolSet = st.Eojeols;

            for (int i = 0; i < eojeolSet.Length; i++)
            {
                System.String[] tags = eojeolSet[i].Tags;
                prevTag = "";
                changed = false;

                for (int j = 0; j < tags.Length; j++)
                {
                    tags[j] = TagMapper.getKaistTagOnLevel(tags[j], TAG_LEVEL);

                    if (tags[j].Equals(prevTag))
                    {
                        changed = true;
                    }
                    prevTag = tags[j];
                }

                if (changed)
                {
                    tagList.Clear();
                    morphemeList.Clear();
                    System.String[] morphemes = eojeolSet[i].Morphemes;

                    for (int j = 0; j < tags.Length - 1; j++)
                    {
                        if (tags[j].Equals(tags[j + 1]))
                        {
                            morphemes[j + 1] = morphemes[j] + morphemes[j + 1];
                        }
                        else
                        {
                            tagList.Add(tags[j]);
                            morphemeList.Add(morphemes[j]);
                        }
                    }
                    tagList.Add(tags[tags.Length - 1]);
                    morphemeList.Add(morphemes[morphemes.Length - 1]);

                    eojeolSet[i] = new Eojeol(morphemeList.ToArray(), tagList.ToArray());
                }
            }
            st.Eojeols = eojeolSet;

            return(st);
        }
Ejemplo n.º 2
0
        /// <summary> It changes the morphological analysis result with 69 KAIST tags to the simplified result with 22 tags.</summary>
        /// <param name="sos">- the result of morphological analysis where each eojeol has more than analysis result
        /// </param>
        /// <returns> the simplified morphological analysis result
        /// </returns>
        public virtual SetOfSentences doProcess(SetOfSentences sos)
        {
            List <Eojeol []> eojeolSetArray = sos.getEojeolSetArray();
            List <Eojeol []> resultSetArray = new List <Eojeol []>();

            int len = eojeolSetArray.Count;

            System.String prevTag = null;
            bool          changed = false;

            for (int pos = 0; pos < len; pos++)
            {
                Eojeol[] eojeolSet = eojeolSetArray[pos];
                dupFilterMap.Clear();

                for (int i = 0; i < eojeolSet.Length; i++)
                {
                    System.String[] tags = eojeolSet[i].Tags;
                    prevTag = "";
                    changed = false;

                    for (int j = 0; j < tags.Length; j++)
                    {
                        tags[j] = TagMapper.getKaistTagOnLevel(tags[j], TAG_LEVEL);

                        if (tags[j].Equals(prevTag))
                        {
                            changed = true;
                        }
                        prevTag = tags[j];
                    }

                    if (changed)
                    {
                        tagList.Clear();
                        morphemeList.Clear();
                        System.String[] morphemes = eojeolSet[i].Morphemes;

                        for (int j = 0; j < tags.Length - 1; j++)
                        {
                            if (tags[j].Equals(tags[j + 1]))
                            {
                                morphemes[j + 1] = morphemes[j] + morphemes[j + 1];
                            }
                            else
                            {
                                tagList.Add(tags[j]);
                                morphemeList.Add(morphemes[j]);
                            }
                        }
                        tagList.Add(tags[tags.Length - 1]);
                        morphemeList.Add(morphemes[morphemes.Length - 1]);

                        eojeolSet[i] = new Eojeol(morphemeList.ToArray(), tagList.ToArray());
                    }

                    System.String key = eojeolSet[i].ToString();
                    if (!dupFilterMap.ContainsKey(key))
                    {
                        dupFilterMap[key] = eojeolSet[i];
                    }
                }
                if (eojeolSet.Length != dupFilterMap.Count)
                {
                    resultSetArray.Add(dupFilterMap.Values.ToArray());
                }
                else
                {
                    resultSetArray.Add(eojeolSet);
                }
            }

            sos.setEojeolSetArray(resultSetArray);
            return(sos);
        }