static void Main(string[] args) { Hunspell hunspell = new Hunspell("en_us.aff", "en_us.dic"); //The folliwng is the trying of the spell checking Console.WriteLine("Trying Spell Checking for the word 'Recommendation'"); Console.WriteLine(hunspell.Spell("Recommendation")); //The following is the trying of the suggesstions Console.WriteLine("\n\n"); Console.WriteLine("Trying the suggesstions of the word 'Recommnedatio'"); List<string> suggesstions = new List<string>(); suggesstions = hunspell.Suggest("Recommnedatio"); foreach (string item in suggesstions) { Console.WriteLine(" --" + item); } //The following is the trying of analysis of word Console.WriteLine("\n\n"); Console.WriteLine("Analyze the word 'children'"); List<string> morphs = hunspell.Analyze("children"); foreach (string morph in morphs) { Console.WriteLine("Morph is: " + morph); } //The following is the trying of Stemming Console.WriteLine("\n\n"); Console.WriteLine("Find the word stem of the word 'children'"); List<string> stems = hunspell.Stem("children"); foreach (string stem in stems) { Console.WriteLine("Word Stem is: " + stem); } //Now for the synonym functions Console.WriteLine("\n\n\nThesaurus/Synonym Functions"); Console.WriteLine("¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯"); //Creating a new instance of the thesarus MyThes thes = new MyThes("th_en_us_v2.dat"); //Synonyms for words Console.WriteLine("Get the synonyms of the plural word 'children'"); ThesResult tr = thes.Lookup("how", hunspell); if (tr.IsGenerated) Console.WriteLine("Generated over stem (The original word form wasn't in the thesaurus)"); foreach (ThesMeaning meaning in tr.Meanings) { Console.WriteLine(); Console.WriteLine(" Meaning: " + meaning.Description); foreach (string synonym in meaning.Synonyms) { Console.WriteLine(" Synonym: " + synonym); } } }
/// <summary> /// Stems the specified word. /// </summary> /// <param name="word"> /// The word. /// </param> /// <returns> /// list of word stems /// </returns> public List <string> Stem(string word) { if (this.IsDisposed) { throw new ObjectDisposedException("SpellFactory"); } if (this.hunspells == null) { throw new InvalidOperationException("Hunspell Dictionary isn't loaded"); } this.hunspellSemaphore.WaitOne(); Hunspell current = null; try { current = this.hunspells.Pop(); return(current.Stem(word)); } finally { if (current != null) { this.hunspells.Push(current); } this.hunspellSemaphore.Release(); } }
private static List<string> Normalize(List<string> words) { var newWords = new List<string>(); using (Hunspell hunspell = new Hunspell("en_us.aff", "en_us.dic")) foreach (var word in words) newWords.Add(hunspell.Stem(word).FirstOrDefault()); return newWords; }
/// <summary> /// Lookups the specified word with word stemming and generation /// </summary> /// <param name="word"> /// The word. /// </param> /// <param name="stemming"> /// The <see cref="Hunspell"/> object for stemming and generation. /// </param> /// <returns> /// The <see cref="ThesResult"/>. /// </returns> public ThesResult Lookup(string word, Hunspell stemming) { if (this.synonyms.Count == 0) { throw new InvalidOperationException("Thesaurus not loaded"); } ThesResult result = this.Lookup(word); if (result != null) { return(result); } List <string> stems = stemming.Stem(word); if (stems == null || stems.Count == 0) { return(null); } var meanings = new List <ThesMeaning>(); foreach (var stem in stems) { ThesResult stemSynonyms = this.Lookup(stem); if (stemSynonyms != null) { foreach (var meaning in stemSynonyms.Meanings) { var currentSynonyms = new List <string>(); foreach (var synonym in meaning.Synonyms) { List <string> generatedSynonyms = stemming.Generate(synonym, word); foreach (var generatedSynonym in generatedSynonyms) { currentSynonyms.Add(generatedSynonym); } } if (currentSynonyms.Count > 0) { meanings.Add(new ThesMeaning(meaning.Description, currentSynonyms)); } } } } if (meanings.Count > 0) { return(new ThesResult(meanings, true)); } return(null); }
/// <summary> /// Stems the specified word. /// </summary> /// <param name="word"> /// The word. /// </param> /// <returns> /// list of word stems /// </returns> public List <string> Stem(string word) { Hunspell hunspell = this.HunspellsPop(); try { return(hunspell.Stem(word)); } finally { this.HunspellsPush(hunspell); } }
public List<Word> Parse() { var words = new Dictionary<string, int>(); using (var hunspell = new Hunspell("ru_RU.aff", "ru_RU.dic")) { foreach (var e in text.ToLower().Split().Select(e => e.CleanTrim()).Where(e => !isDullWord(e))) { var beginWord = hunspell.Stem(e); var word = e; if (beginWord.Count == 1) word = beginWord[0]; if (!words.ContainsKey(word)) words.Add(word, 1); else words[word]++; } } return words.Select(e => new Word(e.Key, e.Value)).ToList(); }
/// <summary> /// Lookups the specified word with word stemming and generation /// </summary> /// <param name="word"> /// The word. /// </param> /// <param name="stemming"> /// The <see cref="Hunspell"/> object for stemming and generation. /// </param> /// <returns> /// The <see cref="ThesResult"/>. /// </returns> public ThesResult Lookup(string word, Hunspell stemming) { if (this.synonyms.Count == 0) { throw new InvalidOperationException("Thesaurus not loaded"); } ThesResult result = this.Lookup(word); if (result != null) { return result; } List<string> stems = stemming.Stem(word); if (stems == null || stems.Count == 0) { return null; } var meanings = new List<ThesMeaning>(); foreach (var stem in stems) { ThesResult stemSynonyms = this.Lookup(stem); if (stemSynonyms != null) { foreach (var meaning in stemSynonyms.Meanings) { var currentSynonyms = new List<string>(); foreach (var synonym in meaning.Synonyms) { List<string> generatedSynonyms = stemming.Generate(synonym, word); foreach (var generatedSynonym in generatedSynonyms) { currentSynonyms.Add(generatedSynonym); } } if (currentSynonyms.Count > 0) { meanings.Add(new ThesMeaning(meaning.Description, currentSynonyms)); } } } } if (meanings.Count > 0) { return new ThesResult(meanings, true); } return null; }
static void FillHash(Hashtable Age1, string name, int count) { int topics = 10; StreamWriter lda = new StreamWriter("train_LDA.bat"); if (count == 1) { lda.WriteLine("Gibbs_lda.exe -est -niters 500 -savestep 501 -ntopics 10 -twords 10 -dfile data.txt"); topics = 10; } else if (count < 5) { lda.WriteLine("Gibbs_lda.exe -est -niters 400 -savestep 401 -ntopics " + count.ToString() + " -twords 25 -dfile data.txt"); topics = 25; } else if (count >= 5 && count <= 10) { lda.WriteLine("Gibbs_lda.exe -est -niters 300 -savestep 301 -ntopics " + count.ToString() + " -twords 15 -dfile data.txt"); topics = 15; } else if (count > 10 && count < 20) { lda.WriteLine("Gibbs_lda.exe -est -niters 200 -savestep 201 -ntopics " + count.ToString() + " -twords 10 -dfile data.txt"); topics = 10; } else if (count >= 20) { lda.WriteLine("Gibbs_lda.exe -est -niters 100 -savestep 101 -ntopics " + count.ToString() + " -twords 5 -dfile data.txt"); topics = 5; } lda.Close(); lda.Dispose(); int[] arr = new int[Age1.Count]; Age1.Values.CopyTo(arr, 0); decimal AvgSenLen = (decimal)arr.Average(); decimal AvgSentencs = decimal.Divide(arr.Count(), count); Process p = new Process(); p.StartInfo.UseShellExecute = true; p.StartInfo.RedirectStandardOutput = false; p.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; p.StartInfo.FileName = "train_LDA"; StreamWriter sw = new StreamWriter("Data.txt"); sw.WriteLine(Age1.Count.ToString()); foreach (DictionaryEntry str in Age1) { string g = str.Key.ToString().Trim('\n', ' ', '\t', '?', '@', '%', '.'); g = Regex.Replace(g, @"[^\u0000-\u007F]", string.Empty); if (g.Length > 3) { sw.WriteLine(g); } } sw.Close(); sw.Dispose(); //p.Refresh(); p.Start(); p.WaitForExit(); p.Close(); p.Dispose(); Age1.Clear(); StreamReader sr = new StreamReader("model-final.twords"); int ind = 0; while (!sr.EndOfStream) { if (ind == 0 || ind == topics) { string ae = sr.ReadLine(); ind = 0; } string[] ww = sr.ReadLine().Trim().Split(' '); try { List <string> ab = hunspl.Stem(ww[0].Trim('*', ':', ',', '.', '{', '}', '(', ')', ',', ';', '?', '!', ' ', '-', '[', ']', ' ', '\t', '\n', '\r').ToLower()); if (ab.Count > 1) { ww[0] = ab.ElementAt(1).ToString(); } else if (ab.Count == 1) { ww[0] = ab.ElementAt(0).ToString(); } } catch { hunspl = new NHunspell.Hunspell("en_US.aff", "en_US.dic"); } if (!Age1.ContainsKey(ww[0])) { Age1.Add(ww[0], ww[3]); } else { decimal w = decimal.Parse(Age1[ww[0]].ToString()); Age1[ww[0]] = w + decimal.Parse(ww[3]); } ind++; } sr.Close(); sr.Dispose(); if (name != null) { StreamWriter sw1 = new StreamWriter("xml_dic/" + name + ".txt"); sw1.WriteLine(AvgSenLen.ToString() + ":" + AvgSentencs.ToString()); //Age1.Add(AvgSenLen.ToString(), AvgSentencs); foreach (DictionaryEntry ent in Age1) { sw1.WriteLine(ent.Key.ToString() + ":" + ent.Value.ToString()); } sw1.Close(); sw1.Dispose(); } }
private static List<string> Synonyms(string word) { var result = new List<string>(); var thes = new MyThes(DatFilePath); using (var hunspell = new Hunspell(AffFilePath, DictionaryFilePath)) { var stemmedWordResult = hunspell.Stem(word); if (stemmedWordResult.Any()) { var stemmedWord = stemmedWordResult.FirstOrDefault(); if (!string.IsNullOrEmpty(stemmedWord)) { var thesaurusResult = thes.Lookup(stemmedWord); if (thesaurusResult != null && thesaurusResult.Meanings != null && thesaurusResult.Meanings.Any()) { thesaurusResult.Meanings.ForEach(m => m.Synonyms .Where(s => s.ToLower() != stemmedWord.ToLower()) .Where(s => s.ToLower() != word.ToLower()) .ToList() .ForEach(s => result.Add(s.ToLower())) ); } } } } return result; }