Example #1
0
        public static void tienXuLy(string input, string output)
        {
            EnglishPorter2Stemmer stemword = new EnglishPorter2Stemmer();

            string line = null;

            List <string> Texts = new List <string>();

            //Doc cac van ban tu file input
            using (StreamReader filein = new StreamReader(input))
            {
                while ((line = filein.ReadLine()) != null)
                {
                    Texts.Add(line);
                }
                filein.Close();
            }

            //Console.WriteLine("===============Cac dong trong file input===============");
            //foreach (string t in Texts)
            //{
            //    Console.WriteLine(t);
            //}

            //Console.WriteLine("===============Cac dong trong file output===============");
            using (StreamWriter fileout = new StreamWriter(output))
            {
                for (int i = 0; i < Texts.Count(); i++)
                {
                    Texts[i] = Texts[i].ToLower();
                    Texts[i] = XoaKyTu(Texts[i]);
                    Texts[i] = XoaTrang(Texts[i]);
                    Texts[i] = BoStopWords(Texts[i]);
                    Texts[i] = PorterStemming(Texts[i]);

                    if (i == (Texts.Count - 1))
                    {
                        // Console.WriteLine(Texts[i]);
                        fileout.Write(Texts[i]);
                    }
                    else
                    {
                        //Console.WriteLine(Texts[i]);
                        fileout.WriteLine(Texts[i]);
                    }
                }
                fileout.Close();
            }
        }
Example #2
0
        public static string PorterStemming(string str)
        {
            EnglishPorter2Stemmer stemword = new EnglishPorter2Stemmer();
            string templine = null;

            List <string> StopWords = new List <string>();

            string[] temp = str.Split(' ');
            for (int i = 0; i < temp.Length; i++)
            {
                temp[i] = stemword.Stem(temp[i]).Value;
                if (i == (temp.Length - 1))
                {
                    templine = templine + temp[i];
                }
                else
                {
                    templine = templine + temp[i] + " ";
                }
            }
            return(templine);
        }
Example #3
0
        public static string PorterStemming(string str)
        {
            EnglishPorter2Stemmer stemword = new EnglishPorter2Stemmer();
            StringBuilder         templine = new StringBuilder();

            List <string> StopWords = new List <string>();

            string[] temp = str.Split(' ');
            for (int i = 0; i < temp.Length; i++)
            {
                temp[i] = stemword.Stem(temp[i]).Value;
                if (i == (temp.Length - 1))
                {
                    templine.Append(temp[i]);
                }
                else
                {
                    templine.Append(temp[i] + " ");
                }
            }
            return(templine.ToString());
        }