Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            Hunspell hunspell = new Hunspell("en_us.aff", "en_us.dic");

            //The folliwng is the trying of the spell checking
            Console.WriteLine("Trying Spell Checking for the word 'Recommendation'");
            Console.WriteLine(hunspell.Spell("Recommendation"));

            //The following is the trying of the suggesstions
            Console.WriteLine("\n\n");
            Console.WriteLine("Trying the suggesstions of the word 'Recommnedatio'");
            List<string> suggesstions = new List<string>();
            suggesstions = hunspell.Suggest("Recommnedatio");
            foreach (string item in suggesstions)
            {
                Console.WriteLine("    --" + item);
            }

            //The following is the trying of analysis of word
            Console.WriteLine("\n\n");
            Console.WriteLine("Analyze the word 'children'");
            List<string> morphs = hunspell.Analyze("children");
            foreach (string morph in morphs)
            {
                Console.WriteLine("Morph is: " + morph);
            }

            //The following is the trying of Stemming
            Console.WriteLine("\n\n");
            Console.WriteLine("Find the word stem of the word 'children'");
            List<string> stems = hunspell.Stem("children");
            foreach (string stem in stems)
            {
                Console.WriteLine("Word Stem is: " + stem);
            }

            //Now for the synonym functions
            Console.WriteLine("\n\n\nThesaurus/Synonym Functions");
            Console.WriteLine("¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯");

            //Creating a new instance of the thesarus
            MyThes thes = new MyThes("th_en_us_v2.dat");

            //Synonyms for words
            Console.WriteLine("Get the synonyms of the plural word 'children'");
            ThesResult tr = thes.Lookup("how", hunspell);

            if (tr.IsGenerated)
                Console.WriteLine("Generated over stem (The original word form wasn't in the thesaurus)");
            foreach (ThesMeaning meaning in tr.Meanings)
            {
                Console.WriteLine();
                Console.WriteLine("  Meaning: " + meaning.Description);
                foreach (string synonym in meaning.Synonyms)
                {
                    Console.WriteLine("    Synonym: " + synonym);

                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Stems the specified word.
        /// </summary>
        /// <param name="word">
        /// The word.
        /// </param>
        /// <returns>
        /// list of word stems
        /// </returns>
        public List <string> Stem(string word)
        {
            if (this.IsDisposed)
            {
                throw new ObjectDisposedException("SpellFactory");
            }

            if (this.hunspells == null)
            {
                throw new InvalidOperationException("Hunspell Dictionary isn't loaded");
            }

            this.hunspellSemaphore.WaitOne();
            Hunspell current = null;

            try
            {
                current = this.hunspells.Pop();
                return(current.Stem(word));
            }
            finally
            {
                if (current != null)
                {
                    this.hunspells.Push(current);
                }

                this.hunspellSemaphore.Release();
            }
        }
Ejemplo n.º 3
0
 private static List<string> Normalize(List<string> words)
 {
     var newWords = new List<string>();
     using (Hunspell hunspell = new Hunspell("en_us.aff", "en_us.dic"))
         foreach (var word in words)
             newWords.Add(hunspell.Stem(word).FirstOrDefault());
     return newWords;
 }
Ejemplo n.º 4
0
        /// <summary>
        /// Lookups the specified word with word stemming and generation
        /// </summary>
        /// <param name="word">
        /// The word.
        /// </param>
        /// <param name="stemming">
        /// The <see cref="Hunspell"/> object for stemming and generation.
        /// </param>
        /// <returns>
        /// The <see cref="ThesResult"/>.
        /// </returns>
        public ThesResult Lookup(string word, Hunspell stemming)
        {
            if (this.synonyms.Count == 0)
            {
                throw new InvalidOperationException("Thesaurus not loaded");
            }

            ThesResult result = this.Lookup(word);

            if (result != null)
            {
                return(result);
            }

            List <string> stems = stemming.Stem(word);

            if (stems == null || stems.Count == 0)
            {
                return(null);
            }

            var meanings = new List <ThesMeaning>();

            foreach (var stem in stems)
            {
                ThesResult stemSynonyms = this.Lookup(stem);

                if (stemSynonyms != null)
                {
                    foreach (var meaning in stemSynonyms.Meanings)
                    {
                        var currentSynonyms = new List <string>();
                        foreach (var synonym in meaning.Synonyms)
                        {
                            List <string> generatedSynonyms = stemming.Generate(synonym, word);
                            foreach (var generatedSynonym in generatedSynonyms)
                            {
                                currentSynonyms.Add(generatedSynonym);
                            }
                        }

                        if (currentSynonyms.Count > 0)
                        {
                            meanings.Add(new ThesMeaning(meaning.Description, currentSynonyms));
                        }
                    }
                }
            }

            if (meanings.Count > 0)
            {
                return(new ThesResult(meanings, true));
            }

            return(null);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Stems the specified word.
        /// </summary>
        /// <param name="word">
        /// The word.
        /// </param>
        /// <returns>
        /// list of word stems
        /// </returns>
        public List <string> Stem(string word)
        {
            Hunspell hunspell = this.HunspellsPop();

            try
            {
                return(hunspell.Stem(word));
            }
            finally
            {
                this.HunspellsPush(hunspell);
            }
        }
        public List<Word> Parse()
        {
            var words = new Dictionary<string, int>();

            using (var hunspell = new Hunspell("ru_RU.aff", "ru_RU.dic"))
            {
                foreach (var e in text.ToLower().Split().Select(e => e.CleanTrim()).Where(e => !isDullWord(e)))
                {
                    var beginWord = hunspell.Stem(e);
                    var word = e;
                    if (beginWord.Count == 1) word = beginWord[0];
                    if (!words.ContainsKey(word))
                        words.Add(word, 1);
                    else
                        words[word]++;
                }
            }
            return words.Select(e => new Word(e.Key, e.Value)).ToList();
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Lookups the specified word with word stemming and generation
        /// </summary>
        /// <param name="word">
        /// The word. 
        /// </param>
        /// <param name="stemming">
        /// The <see cref="Hunspell"/> object for stemming and generation. 
        /// </param>
        /// <returns>
        /// The <see cref="ThesResult"/>.
        /// </returns>
        public ThesResult Lookup(string word, Hunspell stemming)
        {
            if (this.synonyms.Count == 0)
            {
                throw new InvalidOperationException("Thesaurus not loaded");
            }

            ThesResult result = this.Lookup(word);
            if (result != null)
            {
                return result;
            }

            List<string> stems = stemming.Stem(word);
            if (stems == null || stems.Count == 0)
            {
                return null;
            }

            var meanings = new List<ThesMeaning>();
            foreach (var stem in stems)
            {
                ThesResult stemSynonyms = this.Lookup(stem);

                if (stemSynonyms != null)
                {
                    foreach (var meaning in stemSynonyms.Meanings)
                    {
                        var currentSynonyms = new List<string>();
                        foreach (var synonym in meaning.Synonyms)
                        {
                            List<string> generatedSynonyms = stemming.Generate(synonym, word);
                            foreach (var generatedSynonym in generatedSynonyms)
                            {
                                currentSynonyms.Add(generatedSynonym);
                            }
                        }

                        if (currentSynonyms.Count > 0)
                        {
                            meanings.Add(new ThesMeaning(meaning.Description, currentSynonyms));
                        }
                    }
                }
            }

            if (meanings.Count > 0)
            {
                return new ThesResult(meanings, true);
            }

            return null;
        }
Ejemplo n.º 8
0
        static void FillHash(Hashtable Age1, string name, int count)
        {
            int          topics = 10;
            StreamWriter lda    = new StreamWriter("train_LDA.bat");

            if (count == 1)
            {
                lda.WriteLine("Gibbs_lda.exe -est -niters 500 -savestep 501 -ntopics 10 -twords 10 -dfile data.txt"); topics = 10;
            }
            else if (count < 5)
            {
                lda.WriteLine("Gibbs_lda.exe -est -niters 400 -savestep 401 -ntopics " + count.ToString() + " -twords 25 -dfile data.txt"); topics = 25;
            }
            else if (count >= 5 && count <= 10)
            {
                lda.WriteLine("Gibbs_lda.exe -est -niters 300 -savestep 301 -ntopics " + count.ToString() + " -twords 15 -dfile data.txt"); topics = 15;
            }
            else if (count > 10 && count < 20)
            {
                lda.WriteLine("Gibbs_lda.exe -est -niters 200 -savestep 201 -ntopics " + count.ToString() + " -twords 10 -dfile data.txt"); topics = 10;
            }
            else if (count >= 20)
            {
                lda.WriteLine("Gibbs_lda.exe -est -niters 100 -savestep 101 -ntopics " + count.ToString() + " -twords 5 -dfile data.txt"); topics = 5;
            }

            lda.Close();
            lda.Dispose();

            int[] arr = new int[Age1.Count];
            Age1.Values.CopyTo(arr, 0);
            decimal AvgSenLen   = (decimal)arr.Average();
            decimal AvgSentencs = decimal.Divide(arr.Count(), count);

            Process p = new Process();

            p.StartInfo.UseShellExecute        = true;
            p.StartInfo.RedirectStandardOutput = false;
            p.StartInfo.WindowStyle            = ProcessWindowStyle.Hidden;
            p.StartInfo.FileName = "train_LDA";
            StreamWriter sw = new StreamWriter("Data.txt");

            sw.WriteLine(Age1.Count.ToString());
            foreach (DictionaryEntry str in Age1)
            {
                string g = str.Key.ToString().Trim('\n', ' ', '\t', '?', '@', '%', '.');
                g = Regex.Replace(g, @"[^\u0000-\u007F]", string.Empty);
                if (g.Length > 3)
                {
                    sw.WriteLine(g);
                }
            }
            sw.Close();
            sw.Dispose();

            //p.Refresh();
            p.Start();
            p.WaitForExit();
            p.Close();
            p.Dispose();

            Age1.Clear();
            StreamReader sr  = new StreamReader("model-final.twords");
            int          ind = 0;

            while (!sr.EndOfStream)
            {
                if (ind == 0 || ind == topics)
                {
                    string ae = sr.ReadLine();
                    ind = 0;
                }
                string[] ww = sr.ReadLine().Trim().Split(' ');

                try
                {
                    List <string> ab = hunspl.Stem(ww[0].Trim('*', ':', ',', '.', '{', '}', '(', ')', ',', ';', '?', '!', ' ', '-', '[', ']', ' ', '\t', '\n', '\r').ToLower());
                    if (ab.Count > 1)
                    {
                        ww[0] = ab.ElementAt(1).ToString();
                    }
                    else if (ab.Count == 1)
                    {
                        ww[0] = ab.ElementAt(0).ToString();
                    }
                }
                catch { hunspl = new NHunspell.Hunspell("en_US.aff", "en_US.dic"); }

                if (!Age1.ContainsKey(ww[0]))
                {
                    Age1.Add(ww[0], ww[3]);
                }
                else
                {
                    decimal w = decimal.Parse(Age1[ww[0]].ToString());
                    Age1[ww[0]] = w + decimal.Parse(ww[3]);
                }
                ind++;
            }
            sr.Close();
            sr.Dispose();

            if (name != null)
            {
                StreamWriter sw1 = new StreamWriter("xml_dic/" + name + ".txt");
                sw1.WriteLine(AvgSenLen.ToString() + ":" + AvgSentencs.ToString());
                //Age1.Add(AvgSenLen.ToString(), AvgSentencs);

                foreach (DictionaryEntry ent in Age1)
                {
                    sw1.WriteLine(ent.Key.ToString() + ":" + ent.Value.ToString());
                }

                sw1.Close();
                sw1.Dispose();
            }
        }
        private static List<string> Synonyms(string word)
        {
            var result = new List<string>();
             var thes = new MyThes(DatFilePath);
             using (var hunspell = new Hunspell(AffFilePath, DictionaryFilePath))
             {
            var stemmedWordResult = hunspell.Stem(word);
            if (stemmedWordResult.Any())
            {
               var stemmedWord = stemmedWordResult.FirstOrDefault();
               if (!string.IsNullOrEmpty(stemmedWord))
               {
                  var thesaurusResult = thes.Lookup(stemmedWord);
                  if (thesaurusResult != null && thesaurusResult.Meanings != null && thesaurusResult.Meanings.Any())
                  {
                     thesaurusResult.Meanings.ForEach(m => m.Synonyms
                        .Where(s => s.ToLower() != stemmedWord.ToLower())
                        .Where(s => s.ToLower() != word.ToLower())
                        .ToList()
                        .ForEach(s => result.Add(s.ToLower()))
                     );
                  }
               }
            }
             }

             return result;
        }