// ********************** STEMMER, not used in favour of SqlServer // use Hunspell stemmer. I returns or basic word from .DIC or self. // for czech, it does not works for some slovesa public static void init() { //var data = file("cs_cz"); //var data = file("de"); //var data = file("br_FR"); foreach (var data in files()) { var encod = encoding.getEncoding(data.Item2); var lines = File.ReadAllLines(data.Item1, encod).Skip(1).Where(l => !string.IsNullOrEmpty(l) && char.IsLetter(l[0])).Select(l => l.Split('/')[0]).ToArray(); using (var dic = File.OpenRead(data.Item1)) using (var aff = File.OpenRead(data.Item2)) { try { Hunspell.Dictionary dict = new Hunspell.Dictionary(aff, dic); Hunspell.Stemmer stemmer = new Hunspell.Stemmer(dict); foreach (var w in lines) { var stems = stemmer.Stem(w); if (stems == null) { continue; } var stemsStr = stems.Select(s => new String(s.Chars)); } } catch //(Exception exp) { Console.WriteLine(data.Item1); //throw new Exception(data.Item1, exp); } } } Console.WriteLine("DONE"); Console.ReadKey(); }