public static void tienXuLy(string input, string output) { EnglishPorter2Stemmer stemword = new EnglishPorter2Stemmer(); string line = null; List <string> Texts = new List <string>(); //Doc cac van ban tu file input using (StreamReader filein = new StreamReader(input)) { while ((line = filein.ReadLine()) != null) { Texts.Add(line); } filein.Close(); } //Console.WriteLine("===============Cac dong trong file input==============="); //foreach (string t in Texts) //{ // Console.WriteLine(t); //} //Console.WriteLine("===============Cac dong trong file output==============="); using (StreamWriter fileout = new StreamWriter(output)) { for (int i = 0; i < Texts.Count(); i++) { Texts[i] = Texts[i].ToLower(); Texts[i] = XoaKyTu(Texts[i]); Texts[i] = XoaTrang(Texts[i]); Texts[i] = BoStopWords(Texts[i]); Texts[i] = PorterStemming(Texts[i]); if (i == (Texts.Count - 1)) { // Console.WriteLine(Texts[i]); fileout.Write(Texts[i]); } else { //Console.WriteLine(Texts[i]); fileout.WriteLine(Texts[i]); } } fileout.Close(); } }
public static string PorterStemming(string str) { EnglishPorter2Stemmer stemword = new EnglishPorter2Stemmer(); string templine = null; List <string> StopWords = new List <string>(); string[] temp = str.Split(' '); for (int i = 0; i < temp.Length; i++) { temp[i] = stemword.Stem(temp[i]).Value; if (i == (temp.Length - 1)) { templine = templine + temp[i]; } else { templine = templine + temp[i] + " "; } } return(templine); }
public static string PorterStemming(string str) { EnglishPorter2Stemmer stemword = new EnglishPorter2Stemmer(); StringBuilder templine = new StringBuilder(); List <string> StopWords = new List <string>(); string[] temp = str.Split(' '); for (int i = 0; i < temp.Length; i++) { temp[i] = stemword.Stem(temp[i]).Value; if (i == (temp.Length - 1)) { templine.Append(temp[i]); } else { templine.Append(temp[i] + " "); } } return(templine.ToString()); }