/// <summary> It changes the POS tagging result with 69 KAIST tags to the simplified result with 9 tags.</summary> /// <param name="st">- the result of morphological analysis where each eojeol has more than analysis result /// </param> /// <returns> the simplified POS tagging result /// </returns> public virtual Sentence doProcess(Sentence st) { System.String prevTag = null; bool changed = false; Eojeol[] eojeolSet = st.Eojeols; for (int i = 0; i < eojeolSet.Length; i++) { System.String[] tags = eojeolSet[i].Tags; prevTag = ""; changed = false; for (int j = 0; j < tags.Length; j++) { tags[j] = TagMapper.getKaistTagOnLevel(tags[j], TAG_LEVEL); if (tags[j].Equals(prevTag)) { changed = true; } prevTag = tags[j]; } if (changed) { tagList.Clear(); morphemeList.Clear(); System.String[] morphemes = eojeolSet[i].Morphemes; for (int j = 0; j < tags.Length - 1; j++) { if (tags[j].Equals(tags[j + 1])) { morphemes[j + 1] = morphemes[j] + morphemes[j + 1]; } else { tagList.Add(tags[j]); morphemeList.Add(morphemes[j]); } } tagList.Add(tags[tags.Length - 1]); morphemeList.Add(morphemes[morphemes.Length - 1]); eojeolSet[i] = new Eojeol(morphemeList.ToArray(), tagList.ToArray()); } } st.Eojeols = eojeolSet; return(st); }
/// <summary> It changes the morphological analysis result with 69 KAIST tags to the simplified result with 22 tags.</summary> /// <param name="sos">- the result of morphological analysis where each eojeol has more than analysis result /// </param> /// <returns> the simplified morphological analysis result /// </returns> public virtual SetOfSentences doProcess(SetOfSentences sos) { List <Eojeol []> eojeolSetArray = sos.getEojeolSetArray(); List <Eojeol []> resultSetArray = new List <Eojeol []>(); int len = eojeolSetArray.Count; System.String prevTag = null; bool changed = false; for (int pos = 0; pos < len; pos++) { Eojeol[] eojeolSet = eojeolSetArray[pos]; dupFilterMap.Clear(); for (int i = 0; i < eojeolSet.Length; i++) { System.String[] tags = eojeolSet[i].Tags; prevTag = ""; changed = false; for (int j = 0; j < tags.Length; j++) { tags[j] = TagMapper.getKaistTagOnLevel(tags[j], TAG_LEVEL); if (tags[j].Equals(prevTag)) { changed = true; } prevTag = tags[j]; } if (changed) { tagList.Clear(); morphemeList.Clear(); System.String[] morphemes = eojeolSet[i].Morphemes; for (int j = 0; j < tags.Length - 1; j++) { if (tags[j].Equals(tags[j + 1])) { morphemes[j + 1] = morphemes[j] + morphemes[j + 1]; } else { tagList.Add(tags[j]); morphemeList.Add(morphemes[j]); } } tagList.Add(tags[tags.Length - 1]); morphemeList.Add(morphemes[morphemes.Length - 1]); eojeolSet[i] = new Eojeol(morphemeList.ToArray(), tagList.ToArray()); } System.String key = eojeolSet[i].ToString(); if (!dupFilterMap.ContainsKey(key)) { dupFilterMap[key] = eojeolSet[i]; } } if (eojeolSet.Length != dupFilterMap.Count) { resultSetArray.Add(dupFilterMap.Values.ToArray()); } else { resultSetArray.Add(eojeolSet); } } sos.setEojeolSetArray(resultSetArray); return(sos); }