public string NormalizeSample()
        {
            string result = TwitterKoreanProcessorCS.Normalize("정규화(Normalize) 예제입니당~");

            // "정규화(Normalize) 예제입니다~"
            return(result);
        }
        public string ExtractPhraseSample1()
        {
            StringBuilder result = new StringBuilder();

            var tokens  = TwitterKoreanProcessorCS.Tokenize("토큰화 처리 이후 어구를 추출하는 예제입니당ㅇㅇㅇ");
            var phrases = TwitterKoreanProcessorCS.ExtractPhrases(tokens);

            foreach (var phrase in phrases)
            {
                result.AppendLine("---------");
                result.AppendFormat("{0} | ", phrase.Pos.ToString());
                foreach (var token in phrase.Tokens)
                {
                    result.AppendFormat(format: "{0}({1}) [{2},{3}] / ",
                                        args: new object[] { token.Text, token.Pos.ToString(), token.Offset, token.Length });
                }
                result.AppendLine();
            }

            // Noun | 토큰(ProperNoun) [0,2] /
            // Noun | 처리(Noun) [4,2] /
            // ...
            // Noun | 어구(Noun) [10,2] /
            return(result.ToString());
        }
        private void samafc_internal(int starts, int ends)
        {
            var cc     = (ends - starts) / 100;
            var mwords = new Dictionary <string, int>();

            for (int i = starts; i < ends; i++)
            {
                try
                {
                    if (Talks[i].State == TalkState.Message || Talks[i].State == TalkState.Append)
                    {
                        var tokens = TwitterKoreanProcessorCS.Tokenize(Talks[i].Content);
                        var stem   = TwitterKoreanProcessorCS.Stem(tokens);

                        foreach (var word in stem)
                        {
                            if (word.Pos == KoreanPos.ProperNoun)/* || word.Pos == KoreanPos.Noun)*/
                            {
                                if (!mwords.ContainsKey(word.Text))
                                {
                                    mwords.Add(word.Text, 1);
                                }
                                else
                                {
                                    mwords[word.Text] += 1;
                                }
                            }
                        }
                    }
                }
                catch (Exception e)
                {
                    Console.Console.Instance.WriteLine(Talks[i].Content + "\r\n" + e.Message);
                }

                var x = System.Threading.Interlocked.Increment(ref cnt);

                if ((i % cc == 0) || i == ends)
                {
                    lock (cpb)
                        cpb.SetProgress(x / (float)Talks.Count * 100);
                }
            }

            lock (Words)
            {
                foreach (var ww in mwords)
                {
                    if (!Words.ContainsKey(ww.Key))
                    {
                        Words.Add(ww.Key, 1);
                    }
                    else
                    {
                        Words[ww.Key] += ww.Value;
                    }
                }
            }
        }
        public string TokensToStringsSample1()
        {
            var tokens  = TwitterKoreanProcessorCS.Tokenize("토큰화를 처리하는 예제입니다. 문자열화는 덤");
            var results = TwitterKoreanProcessorCS.TokensToStrings(tokens);

            // 토큰 / 화 / 를 / 처리 / 하는 / 예제 / 입니 / 다 / . / 문자열 / 화 / 는 / 덤
            return(string.Join(" / ", results));
        }
        //토큰화
        public string TokenizeSample()
        {
            StringBuilder result = new StringBuilder();

            var tokens  = TwitterKoreanProcessorCS.Tokenize(text);
            var results = TwitterKoreanProcessorCS.TokensToStrings(tokens);

            return(string.Join("/", results));
        }
        /// <summary>
        /// 한국어 input을 감정 분석합니다.
        /// 감정 분석 결과는 다음 GetSentimentAndFlush() 호출 시까지 누적됩니다.
        /// </summary>
        /// <param name="input">한국어 문자열</param>
        public override void Analyze(string input)
        {
            if (!HasStart)
            {
                return;
            }

            string a = Hangul.Assemble(input, true);

            a = TwitterKoreanProcessorCS.Normalize(a);
            var b = TwitterKoreanProcessorCS.Tokenize(a);

            b = TwitterKoreanProcessorCS.Stem(b);
            b = b.SkipWhile((e) => !(
                                e.Pos == KoreanPos.Adjective ||
                                e.Pos == KoreanPos.Adverb ||
                                e.Pos == KoreanPos.Exclamation ||
                                e.Pos == KoreanPos.Noun ||
                                e.Pos == KoreanPos.NounPrefix ||
                                e.Pos == KoreanPos.Verb ||
                                e.Pos == KoreanPos.VerbPrefix
                                ));
            var           c      = TwitterKoreanProcessorCS.TokensToStrings(b);
            int           count  = c.Count();
            List <string> tokens = c.ToList();

            for (int j = 1; j <= NUM_OF_GRAMS; j++)
            {
                for (int i = 0; i < count - j + 1; i++)
                {
                    string word = String.Join(";", tokens.GetRange(i, j));
                    Console.WriteLine(word);
                    if (koreanSentimentDictionary[j - 1].ContainsKey(word))
                    {
                        void UpdateAggregate(object[] args)
                        {
                            KoreanWordSentiment sentiment = args[0] as KoreanWordSentiment;
                            int weight = 1;

                            aggregatePolarity[(int)sentiment.Polarity]   += weight;
                            aggregateIntensity[(int)sentiment.Intensity] += weight;
                            aggregateSubjectivityType[(int)sentiment.SubjectivityType]         += weight;
                            aggregateSubjectivityPolarity[(int)sentiment.SubjectivityPolarity] += weight;
                        }

                        Util.TaskQueue.Add("aggregateKoreanSentiment", UpdateAggregate,
                                           koreanSentimentDictionary[j - 1][word]);
                    }
                }
            }
        }
        public string TokenizeSample1()
        {
            StringBuilder result = new StringBuilder();

            var tokens = TwitterKoreanProcessorCS.Tokenize("토큰화를 처리하는 예제입니다");

            foreach (var token in tokens)
            {
                result.AppendFormat(format: "{0}({1}) [{2},{3}] / ",
                                    args: new object[] { token.Text, token.Pos.ToString(), token.Offset, token.Length });
            }

            // 토큰(ProperNoun) [0,2] / 화(Suffix) [2,1] / 를(Josa) [3,1] /  ... / 입니(Adjective) [12,2] / 다(Eomi) [14,1] /
            return(result.ToString());
        }
        public String Tokenize(String input)
        {
            StringBuilder TokenizeResult = new StringBuilder();

            var tokens = TwitterKoreanProcessorCS.Tokenize(input);

            foreach (var token in tokens)
            {
                if ((token.Pos.ToString()).Equals("Noun"))
                {
                    TokenizeResult.Append(token.Text + " ");
                }
            }
            return(TokenizeResult.ToString());
        }
        //어근화
        public string StemSample()
        {
            StringBuilder result = new StringBuilder();

            var tokens        = TwitterKoreanProcessorCS.Tokenize(text);
            var stemmedTokens = TwitterKoreanProcessorCS.Stem(tokens);

            foreach (var stemmedToken in stemmedTokens)
            {
                result.AppendFormat(format: "{0}({1}) [{2},{3}] / ",
                                    args: new object[] { stemmedToken.Text, stemmedToken.Pos.ToString(), stemmedToken.Offset, stemmedToken.Length });
            }

            return(result.ToString());
        }
        public string StemSample1()
        {
            StringBuilder result = new StringBuilder();

            var tokens        = TwitterKoreanProcessorCS.Tokenize("토큰화 이후 어근화를 처리하는 예제입니다");
            var stemmedTokens = TwitterKoreanProcessorCS.Stem(tokens);

            foreach (var stemmedToken in stemmedTokens)
            {
                result.AppendFormat(format: "{0}({1}) [{2},{3}] / ",
                                    args: new object[] { stemmedToken.Text, stemmedToken.Pos.ToString(), stemmedToken.Offset, stemmedToken.Length });
            }

            // 토큰(ProperNoun) [0,2] / 화(Suffix) [2,1] /  (Space) [3,1] / 이후(Noun) [4,2] / ... / 예제(Noun) [17,2] / 이다(Adjective) [19,3] /
            return(result.ToString());
        }
示例#11
0
        public static string TokenTosTringSample(string saysth)
        {
            string result;

            if (saysth != "종료")
            {
                var tokens  = TwitterKoreanProcessorCS.Tokenize(saysth);
                var results = TwitterKoreanProcessorCS.TokensToStrings(tokens);

                result = string.Join(" / ", results);
            }
            else
            {
                result = "안녕히 가세요";
            }
            return(result);
        }
        /// <summary>
        /// 감정 사전을 로딩하여 감정 분석기를 초기화합니다.
        /// </summary>
        public override void Initialize()
        {
            if (!(instance is null))
            {
                return;
            }
            instance = this;

            #region Load TwitterKoreanProcessor (morpheme analysis engine)
            System.Threading.Tasks.Task.Run(() => {
                var a = TwitterKoreanProcessorCS.Normalize("초기화");
                var b = TwitterKoreanProcessorCS.Tokenize(a);
                b     = TwitterKoreanProcessorCS.Stem(b);
                var c = TwitterKoreanProcessorCS.TokensToStrings(b);
            });
            #endregion

            #region Load Korean sentiment dictionaries

            /*
             * HangulSentiment1.csv is created by MorphemeParser(https://github.com/salt26/morpheme-parser).
             * The original data source is Korean Sentiment Lexicon(http://word.snu.ac.kr/kosac/lexicon.php).
             */
            hangulSentimentCSV        = new List <Util.CSVReader>();
            koreanSentimentDictionary = new List <Dictionary <string, KoreanWordSentiment> >();

            for (int i = 0; i < NUM_OF_GRAMS; i++)
            {
                hangulSentimentCSV.Add(new Util.CSVReader("HangulSentiment" + (i + 1) + ".csv", true));
                koreanSentimentDictionary.Add(new Dictionary <string, KoreanWordSentiment>());

                foreach (List <string> row in hangulSentimentCSV[i].GetData())
                {
                    koreanSentimentDictionary[i].Add(row[0], new KoreanWordSentiment(row[0], row[1], row[2], row[3], row[4]));
                }
            }

            Util.TaskQueue.Add("aggregateKoreanSentiment", InitializeAggregate);
            #endregion

            IsReady = true;
        }
        public string ExtractPhraseSample1()
        {
            StringBuilder result = new StringBuilder();

            var tokens  = TwitterKoreanProcessorCS.Tokenize(text);
            var phrases = TwitterKoreanProcessorCS.ExtractPhrases(tokens);

            foreach (var phrase in phrases)
            {
                result.AppendLine("---------");
                result.AppendFormat("{0} | ", phrase.Pos.ToString());
                foreach (var token in phrase.Tokens)
                {
                    result.AppendFormat(format: "{0}({1}) [{2},{3}] / ",
                                        args: new object[] { token.Text, token.Pos.ToString(), token.Offset, token.Length });
                }
                result.AppendLine();
            }

            return(result.ToString());
        }
示例#14
0
        static void ReadFile(string filename)
        {
            try
            {
                StreamReader sr = new StreamReader(filename);
                while (sr.Peek() >= 0)
                {
                    string strLine = sr.ReadLine();

                    string time      = strLine.Substring(0, 14);
                    string noTimeStr = strLine.Substring(strLine.IndexOf(' '));

                    string name = noTimeStr.Substring(1, noTimeStr.IndexOf(':') - 1);
                    string chat = noTimeStr.Substring(noTimeStr.IndexOf(':') + 2);
                    chat = TwitterKoreanProcessorCS.Normalize(chat);
                    //normalize ex) 재밌닼ㅋㅋㅋㅋ -> 재밌다ㅋㅋ ('ㅋ'갯수를 두개로 줄여줌, 'ㅋ'이 받침에 들어간걸 없애줌)

                    int hour = int.Parse(time.Substring(1, 2));
                    int min  = int.Parse(time.Substring(4, 2));
                    int sec  = int.Parse(time.Substring(7, 2));

                    Line chatLine = new Line(hour, min, sec, name, chat);

                    chatLineArray.Add(chatLine);
                }
                sr.Close();
            }

            catch (Exception ee)
            {
                Console.WriteLine(ee.Message);
            }


            //for(int i=0;i< chatLineArray.Count;i++)
            //{
            //    Console.WriteLine(chatLineArray[i].ToString());
            //}
        }
示例#15
0
        } //  구글 파싱

        public string StemSample1(string input_Search_Data)
        {
            var tokens = TwitterKoreanProcessorCS.Tokenize(
                TwitterKoreanProcessorCS.Normalize(input_Search_Data));
            var stemmedTokens = TwitterKoreanProcessorCS.Stem(tokens);

            List <string> list   = new List <string>();
            List <string> second = new List <string>();

            String sum = "";

            foreach (var stemmedToken in stemmedTokens)
            {
                if (stemmedToken.Pos.ToString().Contains("Noun") || stemmedToken.Pos.ToString() == "Alpha" || stemmedToken.Pos.ToString() == "Punctuation" || stemmedToken.Pos.ToString() == "Adverb" || stemmedToken.Pos.ToString() == "Number")
                {
                    list.Add(stemmedToken.Text);
                }
            }
            for (int i = 0; i < list.Count; i++)
            {
                if (list[i].Contains("제외"))
                {
                    second.Add(" -" + list[i - 1]);
                    second.RemoveAt(i - 1);
                }
                else if (list[i].Contains("혹은"))
                {
                    second.Remove(second.Last());
                    second.Add(list[i - 1] + "|" + list[i + 1]);
                    list.RemoveAt(i + 1);
                }
                else
                {
                    second.Add(list[i]);
                }
            }
            sum = string.Join("", second);
            return(sum);
        } // 네이버 파싱
示例#16
0
        ////////////////////////////////////////////////////////////////
        // 형태소 분석
        ////////////////////////////////////////////////////////////////
        public Tuple <string, string, bool> morphemeProcessor(string message, List <CMessage> list, bool isMainRoom)
        {
            Tuple <string, string, bool> emptyTuple = Tuple.Create("", "", false);

            string normalize      = TwitterKoreanProcessorCS.Normalize(message);
            var    morpheme       = TwitterKoreanProcessorCS.Tokenize(normalize);
            var    morphemeString = TwitterKoreanProcessorCS.TokensToStrings(morpheme);

            string command  = "";
            string contents = "";

            //--------------------------------------------------------------------------
            // 클랜 기능 감지
            //--------------------------------------------------------------------------
            string[] natural = FunctionCommand(normalize).Split(' ');
            bool     isFirst = true;

            command = natural[0].ToString();

            for (int i = 1; i < natural.Count(); i++)
            {
                if (isFirst == true)
                {
                    contents += natural[i].ToString();
                    isFirst   = false;
                }
                else
                {
                    contents += " " + natural[i].ToString();
                }
            }

            if (command != "")
            {
                return(Tuple.Create(command, contents, true));
            }

            //--------------------------------------------------------------------------
            // 영상 조회
            //--------------------------------------------------------------------------
            if (normalize.Contains("영상") == true || normalize.Contains("방송") == true)
            {
                bool isVideo = false;

                foreach (var word in enterCommand)
                {
                    if (normalize.Contains(word) == true)
                    {
                        isVideo = true;
                    }
                }

                if (isVideo == true && normalize.Contains("오늘") == true)
                {
                    int day = 0;
                    if (System.DateTime.Now.Hour < 6)
                    {
                        day = System.DateTime.Now.Day - 1;
                    }
                    else
                    {
                        day = System.DateTime.Now.Day;
                    }

                    string date = System.DateTime.Now.Year.ToString("D4") + System.DateTime.Now.Month.ToString("D2") + day.ToString("D2");

                    Tuple <string, string, bool> tuple = Tuple.Create("/영상", date, true);
                    return(tuple);
                }
                else if (isVideo == true && normalize.Contains("어제") == true)
                {
                    int day = 0;
                    if (System.DateTime.Now.Hour < 6)
                    {
                        day = System.DateTime.Now.Day - 2;
                    }
                    else
                    {
                        day = System.DateTime.Now.Day - 1;
                    }

                    string date = System.DateTime.Now.Year.ToString("D4") + System.DateTime.Now.Month.ToString("D2") + day.ToString("D2");

                    Tuple <string, string, bool> tuple = Tuple.Create("/영상", date, true);
                    return(tuple);
                }
                else if (isVideo == true)
                {
                    Tuple <string, string, bool> tuple = Tuple.Create("/영상", System.DateTime.Now.Year.ToString("D4") + System.DateTime.Now.Month.ToString("D2"), true);
                    return(tuple);
                }
            }

            //--------------------------------------------------------------------------
            // 메뉴 조회
            //--------------------------------------------------------------------------
            Tuple <bool, bool> existMenu = isExistMenu(message, list);

            if (existMenu.Item1 == true)
            {
                return(Tuple.Create(getMenu(message, existMenu.Item2), "", false));
            }

            //--------------------------------------------------------------------------
            // 퇴근 응답
            //--------------------------------------------------------------------------
            if (message.Contains("퇴근") == true)
            {
                string offWork = offWorkCall(message);
                if (offWork != "")
                {
                    return(Tuple.Create(offWork, "", false));
                }
            }

            //--------------------------------------------------------------------------
            // 날씨 감지
            //--------------------------------------------------------------------------
            if (message.Contains("날씨") == true)
            {
                Tuple <string, string> weatherTuple = weatherCall(message);
                if (weatherTuple.Item1 != "" && weatherTuple.Item2 != "")
                {
                    return(Tuple.Create("/날씨", weatherTuple.Item2, true));
                }
            }

            //--------------------------------------------------------------------------
            // 아테나 정보
            //--------------------------------------------------------------------------
            if (message.Contains("아테나") == true)
            {
                string athenaInfo = AthenaInfo(message);
                if (athenaInfo != "")
                {
                    return(Tuple.Create(athenaInfo, "", false));
                }
            }

            //--------------------------------------------------------------------------
            // 그 외
            //--------------------------------------------------------------------------
            int seed = 0;

            // 본 방이 아니면 여기서 반환
            if (isMainRoom == false)
            {
                return(emptyTuple);
            }

            // 1/20 확률로 대답
            Random ansRandom = new Random(unchecked ((int)DateTime.Now.Ticks) + seed++);

            if (ansRandom.Next(20) != 1)
            {
                return(emptyTuple);
            }

            foreach (var word in morpheme)
            {
                Random random = new Random(unchecked ((int)DateTime.Now.Ticks) + seed++);
                int    num    = random.Next(3);

                // 알 수 없는 단어일 경우
                if (word.Unknown == true)
                {
                    switch (num)
                    {
                    case 0:
                        return(Tuple.Create(word.Text.ToString() + "? 그게 무슨 말이에요?", "", false));

                    case 1:
                        return(Tuple.Create(word.Text.ToString() + "? 처음 듣는 말이네요.", "", false));

                    case 2:
                        return(Tuple.Create(word.Text.ToString() + "? 무슨 말인지 모르겠어요.", "", false));
                    }
                }

                // 감탄사
                if (word.Pos.ToString() == "Exclamation")
                {
                    switch (num)
                    {
                    case 0:
                        return(Tuple.Create("와, 정말 놀랍네요.", "", false));

                    case 1:
                        return(Tuple.Create("저도 놀라워요.", "", false));

                    case 2:
                        return(Tuple.Create("대박이네요.", "", false));
                    }
                }
            }

            string     mention  = "";
            int        arrIndex = 0;
            List <int> lstIndex = new List <int>();

            foreach (var word in morpheme)
            {
                if (word.Pos.ToString() == "Noun" || word.Pos.ToString() == "ProperNoun")
                {
                    lstIndex.Add(arrIndex);
                }

                arrIndex++;
            }

            if (lstIndex.Count() == 0)
            {
                return(emptyTuple);
            }

            Random wordRandom = new Random(unchecked ((int)DateTime.Now.Ticks) + seed++);
            int    index      = wordRandom.Next(lstIndex.Count());

            var outputWord = morpheme.ElementAt(lstIndex.ElementAt(index));

            mention = outputWord.Text.ToString();

            if (mention == "")
            {
                return(emptyTuple);
            }

            // 이전 대화내용 참고 기능
            foreach (var queMsg in list)
            {
                string time = "";

                if (queMsg.Time.Minute >= System.DateTime.Now.Minute)
                {
                    continue;
                }

                if ((queMsg.Time.Year == System.DateTime.Now.Year) &&
                    (queMsg.Time.Month == System.DateTime.Now.Month) &&
                    (queMsg.Time.Day == System.DateTime.Now.Day))
                {
                    time = "아까";
                }
                else
                {
                    time = "저번에";
                }


                if (queMsg.Message.Contains(mention) == true)
                {
                    Random queRandom = new Random(unchecked ((int)DateTime.Now.Ticks) + seed++);
                    int    queNumber = queRandom.Next(5);

                    switch (queNumber)
                    {
                    case 0:
                        return(Tuple.Create(time + " " + mention + "에 대해서 말씀하신 적 있어요. 관심있으신가봐요.", "", false));

                    case 1:
                        return(Tuple.Create(time + " 말씀하신 " + mention + " 어떤가요?", "", false));

                    case 2:
                        return(Tuple.Create(time + " " + mention + "에 대해 비슷한 말씀을 하셨었죠.", "", false));

                    case 3:
                        return(Tuple.Create("자주 언급을 하시니 저도 " + mention + "에 대해서 관심을 가져볼까 해요.", "", false));

                    case 4:
                        return(Tuple.Create("아, " + mention + "에 대해서 " + time + " 말씀하셨었어요. 흥미롭네요.", "", false));
                    }
                }
            }

            // 그냥 언급
            Random mentionRandom = new Random(unchecked ((int)DateTime.Now.Ticks) + seed++);
            int    mentionNumber = mentionRandom.Next(5);

            switch (mentionNumber)
            {
            case 0:
                return(Tuple.Create(mention + " 좋아하시나봐요.", "", false));

            case 1:
                return(Tuple.Create(mention + ", 저도 궁금하네요.", "", false));

            case 2:
                return(Tuple.Create(mention + " 어때요?", "", false));

            case 3:
                return(Tuple.Create(mention + " 좋나요?", "", false));

            case 4:
                return(Tuple.Create(mention + ". 흥미롭네요.", "", false));
            }

            return(emptyTuple);
        }
        private void samaf_internal_by_date(int starts, int ends, Func <Talk, bool> filter, Action <int> progress, bool using_tokenizer, bool include_noun, bool ignore_symbols, bool ignore_numeric)
        {
            var cc     = (ends - starts) / 100;
            var mwords = new Dictionary <int, Dictionary <string, int> >();

            for (int i = starts; i < ends; i++)
            {
                if (Talks[i].State == TalkState.Message || Talks[i].State == TalkState.Append)
                {
                    if (!filter(Talks[i]))
                    {
                        continue;
                    }

                    var date = Talks[i].Time.Year * 100 + Talks[i].Time.Month;

                    if (!mwords.ContainsKey(date))
                    {
                        mwords.Add(date, new Dictionary <string, int>());
                    }

                    if (using_tokenizer)
                    {
                        try
                        {
                            var tokens = TwitterKoreanProcessorCS.Tokenize(Talks[i].Content);
                            var stem   = TwitterKoreanProcessorCS.Stem(tokens);

                            foreach (var word in stem)
                            {
                                if (word.Pos == KoreanPos.ProperNoun || (include_noun && word.Pos == KoreanPos.Noun))
                                {
                                    if (!mwords[date].ContainsKey(word.Text))
                                    {
                                        mwords[date].Add(word.Text, 1);
                                    }
                                    else
                                    {
                                        mwords[date][word.Text] += 1;
                                    }
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            ;
                        }
                    }
                    else
                    {
                        var split = Talks[i].Content.Split(' ');

                        split.Where(y =>
                        {
                            double n;
                            return(!double.TryParse(y, out n));
                        }).ToList().ForEach(y =>
                        {
                            var str = y.Trim();
                            if (str == "")
                            {
                                return;
                            }
                            if ("?!.,".Contains(str.Last()))
                            {
                                str = str.Remove(str.Length - 1);
                            }
                            if (str == "")
                            {
                                return;
                            }
                            if (!mwords[date].ContainsKey(str))
                            {
                                mwords[date].Add(str, 1);
                            }
                            else
                            {
                                mwords[date][str] += 1;
                            }
                        });
                    }
                }

                var x = System.Threading.Interlocked.Increment(ref cnt);

                if (cc == 0 || (i % cc == 0))
                {
                    progress(x);
                }
            }

            progress(cnt);

            lock (DateWords)
            {
                foreach (var ww in mwords)
                {
                    if (!DateWords.ContainsKey(ww.Key))
                    {
                        DateWords.Add(ww.Key, new Dictionary <string, int>());
                    }

                    foreach (var w in ww.Value)
                    {
                        if (!DateWords[ww.Key].ContainsKey(w.Key))
                        {
                            DateWords[ww.Key].Add(w.Key, w.Value);
                        }
                        else
                        {
                            DateWords[ww.Key][w.Key] += w.Value;
                        }
                    }
                }
            }
        }
示例#18
0
        Dictionary <String, int> totalNoun;                              //전체 내용중 명사 발생빈도 정보 (자료형 Dictionary<String,int>)

        public void GenerateSections(int _interval, ArrayList chatArray) // interval 몇초단위로 끊을것인가, charArray 채팅내용 저장된 Line배열
        {
            interval     = _interval;
            sectionArray = new ArrayList();
            totalWord    = new Dictionary <string, int>();
            totalPosInfo = new Dictionary <string, int>();
            totalNoun    = new Dictionary <string, int>();

            int endLineSec = ((Line)chatArray[chatArray.Count - 1]).hour * 3600 + ((Line)chatArray[chatArray.Count - 1]).min * 60 + ((Line)chatArray[chatArray.Count - 1]).sec;

            for (int i = 0; i <= endLineSec / interval; i++)
            {
                sectionArray.Add(new Section(i));
            }

            int totalSec;
            int charArrayCount = chatArray.Count;

            for (int i = 0; i < charArrayCount; i++)
            {
                Line temp = (Line)chatArray[i];
                totalSec = temp.hour * 3600 + temp.min * 60 + temp.sec;

                ((Section)sectionArray[totalSec / interval]).count++;
                ((Section)sectionArray[totalSec / interval]).addId(temp.name);

                var tokens = TwitterKoreanProcessorCS.Tokenize(temp.chat);
                try
                {
                    tokens = TwitterKoreanProcessorCS.Stem(tokens); //일반형으로 변환
                }
                catch (Exception e)
                {
                    //stem 중에 애러나면 stem 하지않는다...
                    //왜 애러나는지모름
                    // ex)'어 왜 벌써 61명남음?'
                    //이라는 문장은 stem이 안된다
                }
                foreach (var token in tokens)
                {
                    ((Section)sectionArray[totalSec / interval]).addToken(token.Text, token.Pos);

                    if (totalWord.ContainsKey(token.Text))
                    {
                        totalWord[token.Text]++;
                    }
                    else
                    {
                        totalWord.Add(token.Text, 1);
                    }

                    if (totalPosInfo.ContainsKey(token.Pos.ToString()))
                    {
                        totalPosInfo[token.Pos.ToString()]++;
                    }
                    else
                    {
                        totalPosInfo.Add(token.Pos.ToString(), 1);
                    }

                    if (token.Pos == KoreanPos.Noun)
                    {
                        ((Section)sectionArray[totalSec / interval]).noun_count++;
                    }

                    if (token.Pos == KoreanPos.Noun)
                    {
                        if (totalNoun.ContainsKey(token.Text))
                        {
                            totalNoun[token.Text]++;
                        }
                        else
                        {
                            totalNoun.Add(token.Text, 1);
                        }
                    }

                    if (token.Text.Equals("ㅋㅋ"))
                    {
                        ((Section)sectionArray[totalSec / interval]).kk_count++;
                    }
                    else if (token.Pos == KoreanPos.Space)
                    {
                        ((Section)sectionArray[totalSec / interval]).Space_count++;
                    }
                    else if (token.Pos == KoreanPos.KoreanParticle)
                    {
                        ((Section)sectionArray[totalSec / interval]).KoreanParticle_count++;
                    }
                    else if (token.Pos == KoreanPos.ProperNoun)
                    {
                        ((Section)sectionArray[totalSec / interval]).ProperNoun_count++;
                    }
                    else if (token.Pos == KoreanPos.Verb)
                    {
                        ((Section)sectionArray[totalSec / interval]).Verb_count++;
                    }
                    else if (token.Pos == KoreanPos.Josa)
                    {
                        ((Section)sectionArray[totalSec / interval]).Josa_count++;
                    }
                    else if (token.Pos == KoreanPos.Punctuation)
                    {
                        ((Section)sectionArray[totalSec / interval]).Punctuation_count++;
                    }
                    else if (token.Pos == KoreanPos.Alpha)
                    {
                        ((Section)sectionArray[totalSec / interval]).Alpha_count++;
                    }
                    else if (token.Pos == KoreanPos.Number)
                    {
                        ((Section)sectionArray[totalSec / interval]).Number_count++;
                    }
                }
            }
        }
        //Normalize
        public string NormalizeSample()
        {
            var result = TwitterKoreanProcessorCS.Normalize(text);

            return(result);
        }
示例#20
0
        } // 삭제하기

        //-----------------------< /region button >------------------------//
        #endregion /Button

        #region Parsing_Data
        //-----------------------< parsing data >-----------------------------//
        public string StemSample2(string input_Search_Data)
        {
            var tokens = TwitterKoreanProcessorCS.Tokenize(
                TwitterKoreanProcessorCS.Normalize(input_Search_Data));
            var stemmedTokens = TwitterKoreanProcessorCS.Stem(tokens);

            List <string> list   = new List <string>();
            List <string> second = new List <string>();
            List <string> third  = new List <string>();
            List <string> forth  = new List <string>();
            List <string> fifth  = new List <string>();
            List <string> final  = new List <string>();
            List <string> seq    = new List <string>();
            List <string> plus   = new List <string>();
            List <string> sub    = new List <string>();
            List <string> output = new List <string>();
            List <string> input  = new List <string>();
            List <string> or     = new List <string>();

            String sum = "";

            foreach (var stemmedToken in stemmedTokens)
            {
                if (stemmedToken.Pos.ToString().Contains("Noun") || stemmedToken.Pos.ToString() == "Alpha" || stemmedToken.Pos.ToString() == "Punctuation" || stemmedToken.Pos.ToString() == "Adverb" || stemmedToken.Pos.ToString() == "Number")
                {
                    list.Add(stemmedToken.Text);
                }
            }
            for (int i = 0; i < list.Count; i++)
            {
                if (list[i].Contains("제외"))
                {
                    second.Add(" -" + list[i - 1]);
                    second.RemoveAt(i - 1);
                }
                else if (list[i].Contains("혹은"))
                {
                    second.Remove(second.Last());
                    second.Add(list[i - 1] + "OR" + list[i + 1]);
                    list.RemoveAt(i + 1);
                }
                else if (list[i].Contains("~"))
                {
                    second.Remove(second.Last());
                    second.Add(list[i - 1] + ".." + list[i + 1]);
                    list.RemoveAt(i + 1);
                }
                else
                {
                    second.Add(list[i]);
                }
            }

            for (int i = 0; i < second.Count; i++)
            {
                if (second[i].Contains("형식"))
                {
                    if (second[i - 1].Contains("ppt"))
                    {
                        second[i] = "filetype:ppt";
                        third.Add(second[i]);
                        third.RemoveAt(i - 1);
                    }

                    else if (second[i - 1].Contains("pdf"))
                    {
                        second[i] = "filetype:pdf";
                        third.Add(second[i]);
                        third.RemoveAt(i - 1);
                    }

                    else if (second[i - 1].Contains("xls"))
                    {
                        second[i] = "filetype:xls";
                        third.Add(second[i]);
                        third.RemoveAt(i - 1);
                    }

                    else if (second[i - 1].Contains("doc"))
                    {
                        second[i] = "filetype:doc";
                        third.Add(second[i]);
                        third.RemoveAt(i - 1);
                    }
                }
                else
                {
                    third.Add(second[i]);
                }
            }

            for (int i = 0; i < third.Count; i++)
            {
                if (third[i] == "인스타그램")
                {
                    forth.Add("site:www.instagram.com");
                }
                else if (third[i] == "네이버")
                {
                    forth.Add("site:www.naver.com");
                }
                else if (third[i] == "페이스북")
                {
                    forth.Add("site:www.facebook.com");
                }
                else if (third[i] == "구글")
                {
                    forth.Add("site:www.google.com");
                }
                else if (third[i] == "유튜브")
                {
                    forth.Add("site:www.youtube.com");
                }
                else
                {
                    forth.Add(third[i]);
                }
            }

            for (int i = 0; i < forth.Count; i++)
            {
                if (forth[i] == "해시태그")
                {
                    fifth.RemoveAt(i - 1);
                    fifth.Add("%23" + forth[i - 1]);
                }
                else
                {
                    fifth.Add(forth[i]);
                }
            }

            for (int i = 0; i < fifth.Count; i++)
            {
                if (fifth[i].Contains("-"))
                {
                    sub.Add(fifth[i]);
                }
                else if (fifth[i].Contains("-"))
                {
                    or.Add(fifth[i]);
                }
                else if (fifth[i].Contains("site") || fifth[i].Contains("filetype"))
                {
                    seq.Add(fifth[i]);
                }
                else
                {
                    plus.Add(fifth[i]);
                }
            }

            output.AddRange(plus);
            output.AddRange(or);
            output.AddRange(sub);
            output.AddRange(seq);

            for (int i = 0; i < output.Count; i++)
            {
                if (i == 0)
                {
                    input.Add(output[i]);
                }
                else if (output[i].Contains("-"))
                {
                    input.Add(output[i]);
                }
                else if (fifth[i].Contains("#"))
                {
                    input.Add(output[i]);
                }
                else
                {
                    input.Add("+" + output[i]);
                }
            }

            sum = string.Join("", input);

            return(sum);
        } //  구글 파싱
示例#21
0
        private void BgWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            //selects the text encoding based on user selection
            Encoding SelectedEncoding = null;

            this.Invoke((MethodInvoker) delegate()
            {
                SelectedEncoding = Encoding.GetEncoding(EncodingDropdown.SelectedItem.ToString());
            });



            //get the list of files
            var SearchDepth = SearchOption.TopDirectoryOnly;

            if (ScanSubfolderCheckbox.Checked)
            {
                SearchDepth = SearchOption.AllDirectories;
            }
            var files = Directory.EnumerateFiles(((string[])e.Argument)[0], "*.txt", SearchDepth);



            try {
                string outputdir = Path.Combine(((string[])e.Argument)[1]);

                Directory.CreateDirectory(outputdir);



                foreach (string fileName in files)
                {
                    //set up our variables to report
                    string Filename_Clean = Path.GetFileName(fileName);



                    //report what we're working on
                    FilenameLabel.Invoke((MethodInvoker) delegate
                    {
                        FilenameLabel.Text = "Analyzing: " + Filename_Clean;
                    });



                    //do stuff here
                    string readText = File.ReadAllText(fileName, SelectedEncoding).ToLower();

                    var TokenResults = TwitterKoreanProcessorCS.Tokenize(readText);

                    StringBuilder Builder = new StringBuilder();

                    int tokenCount = TokenResults.Count();

                    for (int i = 0; i < tokenCount; i++)
                    {
                        if (TokenResults.ElementAt(i).Pos != KoreanPos.Space)
                        {
                            Builder.Append(TokenResults.ElementAt(i).Text + ' ');
                        }
                    }

                    using (System.IO.StreamWriter fileout =
                               new StreamWriter(Path.Combine(outputdir, Filename_Clean), false, SelectedEncoding))
                    {
                        fileout.Write(Builder.ToString());
                    }
                }
            }
            catch
            {
                MessageBox.Show("KoToken encountered a problem while trying to tokenize/write a file.");
            }
        }