private static string SearchWord(Word word) { int attempts = 3; while (attempts > 0) { try { if (word.GrammaticalClass != null) { break; } string htmlString = new System.Net.WebClient().DownloadString(string.Format("http://www.dicionarioinformal.com.br/{0}/", word.Token)); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlString); ClassifyWord(word, doc); GetMoreWords(doc); //System.Diagnostics.Debug.WriteLine(string.Format("http://www.dicionarioinformal.com.br/{0}/", word.Token)); break; } catch (Exception) { attempts--; } } numberOfWordsProccessed++; if (attempts <= 0) { return "Error: " + word.Token; } else { return string.Format("http://www.dicionarioinformal.com.br/{0}", word.Token); } }
private static void ClassifyWord(Word word, HtmlDocument doc) { var span = doc.DocumentNode.SelectSingleNode("//span[@class='textoDefinicao']"); if (span != null) { List<HtmlNode> meaningfullNodes = span.ChildNodes.Where(node => node.Name == "#text").ToList(); foreach (var node in meaningfullNodes) { string innerText = node.InnerText.ToLower(); Grammar grammaticalClass = null; //verifica as classes gramaticais que a palavra pode assumir #region Classe Gramatical if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Substantivo.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Substantivo, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Artigo.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Artigo, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Adjetivo.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Adjetivo, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Numeral.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Numeral, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Pronome.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Pronome, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Verbo.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Verbo, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Advérbio.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Advérbio, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Preposição.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Preposição, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Conjunção.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Conjunção, }; } if (innerText.ToLower().LastIndexOf(GrammaticalClassEnum.Interjeição.ToString().ToLower()) > 0) { grammaticalClass = new Grammar() { GrammaticalClass = GrammaticalClassEnum.Interjeição, }; } #endregion //verifica a variação de número da palavra #region Variação de Número if (grammaticalClass != null) { if (innerText.ToLower().LastIndexOf(NumberVariationEnum.Plural.ToString().ToLower()) > 0) { grammaticalClass.NumberVariation = NumberVariationEnum.Plural; } if (innerText.ToLower().LastIndexOf(NumberVariationEnum.Singular.ToString().ToLower()) > 0) { grammaticalClass.NumberVariation = NumberVariationEnum.Singular; } } #endregion //verifica o genero da palavra #region Variação de Genero if (grammaticalClass != null) { if (innerText.ToLower().LastIndexOf(GenderVariationEnum.Feminino.ToString().ToLower()) > 0) { grammaticalClass.GenderVariation = GenderVariationEnum.Feminino; } if (innerText.ToLower().LastIndexOf(GenderVariationEnum.Masculino.ToString().ToLower()) > 0) { grammaticalClass.GenderVariation = GenderVariationEnum.Masculino; } } #endregion if (grammaticalClass != null) { if (word.GrammaticalClass == null) { word.GrammaticalClass = new Dictionary<GrammaticalClassEnum, Grammar>(); word.GrammaticalClass.Add(grammaticalClass.GrammaticalClass, grammaticalClass); } else { if (!word.GrammaticalClass.ContainsKey(grammaticalClass.GrammaticalClass)) { word.GrammaticalClass.Add(grammaticalClass.GrammaticalClass, grammaticalClass); } } //System.Diagnostics.Debug.WriteLine(Newtonsoft.Json.JsonConvert.SerializeObject(grammaticalClass)); } } } if (!wordDictionary.ContainsKey(word.Token)) { wordDictionary.TryAdd(word.Token, word); } else { if (word.GrammaticalClass != null) { foreach (GrammaticalClassEnum key in word.GrammaticalClass.Keys) { if (!wordDictionary[word.Token].GrammaticalClass.ContainsKey(key)) { wordDictionary[word.Token].GrammaticalClass.Add(key, word.GrammaticalClass[key]); } } } } }
private static void SaveWords() { //System.Xml.Serialization.XmlSerializer serializer = new System.Xml.Serialization.XmlSerializer(WordRepository); List<Word> listOfWords = wordDictionary.Values.OrderBy(w => w.Token).ToList(); WordRepository repository = new WordRepository() { WordList = new List<Word>(), }; foreach (Word word in listOfWords) { Dictionary<string, FriendlyGrammar> friendlyGrammaticalDictionary = null; if (word.GrammaticalClass != null) { friendlyGrammaticalDictionary = new Dictionary<string, FriendlyGrammar>(); foreach (Grammar grammar in word.GrammaticalClass.Values) { friendlyGrammaticalDictionary.Add(grammar.GrammaticalClass.ToString(), new FriendlyGrammar() { GenderVariation = grammar.GenderVariation.ToString(), NumberVariation = grammar.NumberVariation.ToString(), GrammaticalClass = grammar.GrammaticalClass.ToString(), }); } } Word repositoryWord = new Word() { Token = word.Token, GrammaticalClass = word.GrammaticalClass, FriendlyGrammaticalClass = friendlyGrammaticalDictionary, GrammaticalClassAsJson = Newtonsoft.Json.JsonConvert.SerializeObject(word.GrammaticalClass), }; repository.WordList.Add(repositoryWord); } string serializedJson = Newtonsoft.Json.JsonConvert.SerializeObject(repository); File.WriteAllText(jsonRepoPath, serializedJson); //using (Stream stream = new FileStream(xmlRepoPath, FileMode.Create)) //{ // System.Xml.Serialization.XmlSerializer xmlserializer = new System.Xml.Serialization.XmlSerializer(typeof(WordRepository)); // xmlserializer.Serialize(stream, repository); //} //string serializedXml = ServiceStack.Text.XmlSerializer.SerializeToString<WordRepository>(repository); //File.WriteAllText(repoPath, serializedXml); }