예제 #1
0
파일: dicts.cs 프로젝트: reactxx/rewise
 // ********************** STEMMER, not used in favour of SqlServer
 // use Hunspell stemmer. I returns or basic word from .DIC or self.
 // for czech, it does not works for some slovesa
 public static void init()
 {
     //var data = file("cs_cz");
     //var data = file("de");
     //var data = file("br_FR");
     foreach (var data in files())
     {
         var encod = encoding.getEncoding(data.Item2);
         var lines = File.ReadAllLines(data.Item1, encod).Skip(1).Where(l => !string.IsNullOrEmpty(l) && char.IsLetter(l[0])).Select(l => l.Split('/')[0]).ToArray();
         using (var dic = File.OpenRead(data.Item1))
             using (var aff = File.OpenRead(data.Item2)) {
                 try {
                     Hunspell.Dictionary dict    = new Hunspell.Dictionary(aff, dic);
                     Hunspell.Stemmer    stemmer = new Hunspell.Stemmer(dict);
                     foreach (var w in lines)
                     {
                         var stems = stemmer.Stem(w);
                         if (stems == null)
                         {
                             continue;
                         }
                         var stemsStr = stems.Select(s => new String(s.Chars));
                     }
                 } catch //(Exception exp)
                 {
                     Console.WriteLine(data.Item1);
                     //throw new Exception(data.Item1, exp);
                 }
             }
     }
     Console.WriteLine("DONE");
     Console.ReadKey();
 }