public static Task SaveToDataBase() { var entity = new EnglishWordsEntities(); entity.Configuration.AutoDetectChangesEnabled = false; do { int counter = 0; while (!Queue.IsEmpty) { Queue.TryDequeue(out var dic); entity.Set <Wiktionary>().Add(dic); counter++; if (counter >= 10000) { entity.SaveChanges(); counter = 0; Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("Insert 10000\tfrom\t" + Queue.Count); Console.ForegroundColor = ConsoleColor.Gray; } } entity.SaveChanges(); Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("Insert data\tfrom\t" + Queue.Count); Console.ForegroundColor = ConsoleColor.Gray; Task.Delay(1000).Wait(); } while (true); }
public static void StartDownloadAsync() { var VocabularyCom = AppDomain.CurrentDomain.BaseDirectory + "\\VocabularyCom\\"; var DictionaryCom = AppDomain.CurrentDomain.BaseDirectory + "\\DictionaryCom\\"; Directory.CreateDirectory(VocabularyCom); Directory.CreateDirectory(DictionaryCom); do { var entity = new EnglishWordsEntities(); List <Task> taskList = new List <Task>(); var rows = entity.Roots.Where(x => x.DictionaryCom == null && x.Grouped).Take(100).ToList(); if (rows.Count > 0) { foreach (var row in rows) { var task = Task.Factory.StartNew(() => { try { var resulT = GetDataDictionary(row, DictionaryCom + row.ID + ".txt", "https://www.dictionary.com/noresult?term=" + row.Word); resulT.Wait(); var result = resulT.Result; var entity2 = new EnglishWordsEntities(); entity2.Roots.AddOrUpdate(result); entity2.SaveChanges(); if (result.DictionaryCom.HasValue && result.DictionaryCom.Value) { Console.ForegroundColor = ConsoleColor.DarkGreen; Console.WriteLine("Dictionary Found ... " + row.Word); } else { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("Dictionary Not Found ... " + row.Word); } } catch (Exception e) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(e); } }); taskList.Add(task); } Task.WaitAll(taskList.ToArray()); taskList.Clear(); //row.VocabularyCom = await GetDataVocabulary(row, VocabularyCom + row.ID + ".txt", // "https://www.vocabulary.com/dictionary/" + row.Word); } else { break; } } while (true); }
internal void SaveResourceTranslated(GetWordForTranslate_Result data, CallBankService objectT) { entity.WordTranslates.AddOrUpdate(new WordTranslate { WordID = data.WordID, LanguageId = data.LangId, Translated = objectT.Text, AllWords = objectT.All.Aggregate((x, y) => x + ", " + y).Trim(' ', ','), AllData = data.Translated, CreateDate = DateTime.Now }); entity.SaveChanges(); if (entity.WordTranslates.Count(x => x.WordID == data.WordID) == entity.Languages.Count()) { entity.AllWordFromPaymons.Where(x => x.ID == data.WordID) .UpdateFromQuery(x => new AllWordFromPaymon { Translated = true }); } }
public static void MakeWordFamiliy() { var entity = new EnglishWordsEntities(); var allroot = entity.Roots.ToList(); var allwordFamily = allroot.Where(x => !string.IsNullOrEmpty(x.VocabularyWordFamilyJson)).Select(x => JsonConvert.DeserializeObject <List <WordFamily> >(x.VocabularyWordFamilyJson)) .ToList(); List <WordFamily> wordFamilies = new List <WordFamily>(); foreach (var f in allwordFamily) { wordFamilies.AddRange(f); } var distincted = wordFamilies.Select(x => new WordFamilySimple() { parent = x.parent, word = x.word, }).Distinct().ToList(); List <Task> taskList = new List <Task>(); var NeedToSearch = allroot.Where(x => x.Parent == null).ToList(); foreach (var t in NeedToSearch) { taskList.Add(Task.Factory.StartNew(() => { var r = t; var dis = distincted.FirstOrDefault(x => x.word == r.Word); if (dis == null) { return; } var parent = allroot.FirstOrDefault(x => x.Word == dis.parent); if (parent == null) { return; } Console.WriteLine($"Found Parent {parent.Word}\t{r.Word}"); r.Parent = parent.ID; var entity2 = new EnglishWordsEntities(); entity2.Roots.AddOrUpdate(r); entity2.SaveChanges(); })); if (taskList.Count > 20) { Task.WaitAll(taskList.ToArray()); taskList.Clear(); } } }
public static void ExtractWordFamiliy() { var files = Directory.GetFiles(@"F:\Projects\Geeksltd\Netflix\ConsoleAppWordProcess\bin\Debug\VocabularyCom", "*.*"); var entity2 = new EnglishWordsEntities(); var idsHaveData = entity2 .Roots .Where(x => !string.IsNullOrEmpty(x.VocabularyWordFamilyJson)) .Select(x => x.ID) .ToList(); var mustToRead = files.Select(x => new { FilePath = x, ID = int.Parse(x.Split(new[] { '\\' }, StringSplitOptions.RemoveEmptyEntries).LastOrDefault()?.Replace(".txt", "")) }).ToList().Where(x => !idsHaveData.Contains(x.ID)).Select(x => x.FilePath).ToList(); List <Task> listTask = new List <Task>(); foreach (var file in mustToRead) { listTask.Add(Task.Factory.StartNew(() => { FileInfo fi = new FileInfo(file); var doc = new HtmlDocument(); doc.Load(file); var wordFamilyRoot = doc.DocumentNode.Descendants("vcom:wordfamily").ToList(); if (wordFamilyRoot.Any()) { var data = wordFamilyRoot.FirstOrDefault()?.GetAttributeValue("data", ""); var jsonData = System.Web.HttpUtility.HtmlDecode(data); var wordFamily = JsonConvert.DeserializeObject <List <WordFamily> >(jsonData); if (wordFamily.Count > 0) { var id = int.Parse(fi.Name.Replace(".txt", "")); var entity = new EnglishWordsEntities(); var word = entity.Roots.FirstOrDefault(x => x.ID == id); word.VocabularyWordFamilyJson = jsonData; entity.Roots.AddOrUpdate(word); entity.SaveChanges(); Console.WriteLine("Saved " + word.Word + " count: " + wordFamily.Count); } } })); if (listTask.Count > 10) { Task.WaitAll(listTask.ToArray()); listTask.Clear(); } } }
public static void MakeCount() { var AllWords = File.ReadAllLines(@"D:\temp\txt\txt.txt").ToList(); List <string> processWords = new List <string>(); foreach (var c in AllWords) { var res = NormalString(c).Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); if (res.Any()) { processWords.AddRange(res); } } var groupedWord = processWords.GroupBy(x => x).ToDictionary(kvp => kvp.Key, kvp => kvp.Count()); var entity = new EnglishWordsEntities(); var allroot = entity.Roots.ToList(); List <Task> taskList = new List <Task>(); foreach (var word in groupedWord) { taskList.Add(Task.Factory.StartNew(() => { var w = allroot.FirstOrDefault(x => x.Word == word.Key); if (w != null) { w.Count = word.Value; var entity2 = new EnglishWordsEntities(); entity2.Roots.AddOrUpdate(w); entity2.SaveChanges(); Console.WriteLine($"{w.Word}\t{word.Value}"); } })); if (taskList.Count > 20) { Task.WaitAll(taskList.ToArray()); taskList.Clear(); } } }
public static void GetWordRank() { var allWords = File.ReadAllLines(@"D:\temp\txt\words_Rank.csv"); List <Task> taskList = new List <Task>(); foreach (var allWord in allWords) { taskList.Add(Task.Factory.StartNew(() => { var splited = allWord.Split(new[] { ',', ' ', '"' }, StringSplitOptions.RemoveEmptyEntries); if (splited.Length > 0) { if (int.TryParse(splited[0], out var id)) { var entity = new EnglishWordsEntities(); var listWord = splited.ToList(); listWord.RemoveAt(0); var roots = entity.Roots.Where(x => listWord.Contains(x.Word)).ToList(); foreach (var root in roots) { root.RankFromPaymon = id; entity.Roots.AddOrUpdate(root); } entity.SaveChanges(); Console.WriteLine("Saved Words ... " + allWord); } } })); if (taskList.Count > 10) { Task.WaitAll(taskList.ToArray()); taskList.Clear(); } } if (taskList.Count > 10) { Task.WaitAll(taskList.ToArray()); taskList.Clear(); } }
public static void MakeCountFowWord() { var entity = new EnglishWordsEntities(); Console.WriteLine($"Fetching Roots"); var allroot = entity.Roots.Select(x => new { x.ID, Word = x.Word.ToLower().Trim(), x.Count }).ToList(); Console.WriteLine($"Fetching RankAndCounts"); var paymonWord = entity.RankAndCounts.Where(x => x.NumberTimes == null).ToList(); List <Task> taskList = new List <Task>(); foreach (var word in paymonWord) { taskList.Add(Task.Factory.StartNew(() => { var rowWord = (word.Word.Trim().ToLower() + "," + word.OtherForms.Trim().ToLower()).Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries).Select(x => x.Trim().ToLower()).ToList(); var sum = allroot.Where(x => rowWord.Contains(x.Word)).Select(x => x.Count).DefaultIfEmpty(0).Sum(); word.NumberTimes = sum; var entity2 = new EnglishWordsEntities(); entity2.RankAndCounts.AddOrUpdate(word); entity2.SaveChanges(); Console.WriteLine($"{word.Rank}\t{rowWord.Aggregate((x, y) => x + "," + y)}\t{sum}"); })); if (taskList.Count > 20) { Task.WaitAll(taskList.ToArray()); taskList.Clear(); } } }
public static void StartDownloadVoabulary(object obj) { var VocabularyCom = AppDomain.CurrentDomain.BaseDirectory + "\\VocabularyCom\\"; Directory.CreateDirectory(VocabularyCom); do { var entity = new EnglishWordsEntities(); List <Task> taskList = new List <Task>(); var rows = entity.Roots.Where(x => x.VocabularyCom == null //&& x.DictionaryCom != null //&& x.DictionaryCom.Value ) .Take(10).ToList(); if (rows.Count > 0) { foreach (var row in rows) { var task = Task.Factory.StartNew(() => { try { var resulT = GetDataVocabulary(row, VocabularyCom + row.ID + ".txt", "https://www.vocabulary.com/dictionary/" + row.Word); resulT.Wait(); var result = resulT.Result; var entity2 = new EnglishWordsEntities(); result.CreateDate = DateTime.Now; if (result.VocabularyComStatusCode == 403) { result.VocabularyCom = null; } entity2.Roots.AddOrUpdate(result); entity2.SaveChanges(); if (result.VocabularyComStatusCode == 200) { if (result.VocabularyCom != null && result.VocabularyCom.Value) { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($"{DateTime.Now} Vocabulary Found ... " + row.Word); } else { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine($"{DateTime.Now} Vocabulary Not Found ... " + row.Word); } } else if (result.VocabularyComStatusCode == 403) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine($"{DateTime.Now} Vocabulary Forrbiden ... " + row.Word); } else { Console.ForegroundColor = ConsoleColor.Magenta; Console.WriteLine($"{DateTime.Now} Vocabulary {result.VocabularyComStatusCode ?? 0}... " + row.Word); } //Console.WriteLine("Saved " + result.Word); } catch (Exception e) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(e); } }); taskList.Add(task); } Task.WaitAll(taskList.ToArray()); //Task.Delay(1000).Wait(); taskList.Clear(); entity.Dispose(); GC.Collect(); } else { break; } } while (true); }
public static void ExtractWordFamiliyFromDictionary() { var files = Directory.GetFiles(@"F:\Projects\Geeksltd\Netflix\ConsoleAppWordProcess\bin\Debug\DictionaryCom", "*.*"); var entity2 = new EnglishWordsEntities(); var allword = entity2.Roots.ToList(); var idsHaveData = allword .Where(x => !string.IsNullOrEmpty(x.DictionaryWordFamily) || x.Parent != null) .Select(x => x.ID) .ToList(); var mustToRead = files.Select(x => new { FilePath = x, ID = int.Parse(x.Split(new[] { '\\' }, StringSplitOptions.RemoveEmptyEntries).LastOrDefault()?.Replace(".txt", "")) }).Where(x => !idsHaveData.Contains(x.ID)).ToList(); //.Select(x => x.FilePath) //.ToList(); List <Task> listTask = new List <Task>(); foreach (var file in mustToRead) { //listTask.Add(Task.Factory.StartNew(() => // { FileInfo fi = new FileInfo(file.FilePath); var doc = new HtmlDocument(); doc.Load(file.FilePath); var currentWord = allword.FirstOrDefault(x => x.ID == file.ID); if (currentWord.Parent.HasValue) { continue; } var h1Element = doc.DocumentNode.Descendants("h1"); if (h1Element.Count() > 1) { File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "\\h1.txt", "\r\n" + currentWord.ID); Console.WriteLine("File Have more than H1 " + currentWord.Word); } else { var h1 = h1Element.FirstOrDefault().InnerText; if (h1 != null && currentWord != null && h1.ToLower() != currentWord.Word.ToLower()) { var lowered = h1.ToLower(); var parent = allword.FirstOrDefault(x => x.Word == lowered); if (parent != null) { currentWord.Parent = parent.ID; currentWord.DictionaryWordFamily = lowered; var entity = new EnglishWordsEntities(); entity.Roots.AddOrUpdate(currentWord); entity.SaveChanges(); Console.WriteLine($"Found Origin {lowered}\t{currentWord.Word}"); } } else { var section = doc.DocumentNode.SelectSingleNode("//h2[@id='wordOrigin']"); if (section != null && section.ParentNode != null) { var parentNode = section.ParentNode; var origin = parentNode.Descendants() .Where(x => x.Name == "a" && x.GetAttributeValue("class", "") == "luna-xref").Select(x => x.InnerText).ToList().OrderByDescending(x => x).FirstOrDefault(); if (origin != null && origin.ToLower() != currentWord.Word) { var lowered = origin.ToLower(); var parent = allword.FirstOrDefault(x => x.Word == lowered); if (parent != null) { currentWord.Parent = parent.ID; currentWord.DictionaryWordFamily = lowered; var entity = new EnglishWordsEntities(); entity.Roots.AddOrUpdate(currentWord); entity.SaveChanges(); Console.WriteLine($"Found Origin {lowered}\t{currentWord.Word}"); } } } } } //var titleSection = doc.DocumentNode // .Descendants("section") // .Where(e => e.GetAttributeValue("class", "").Contains("css-0")).ToList(); // va//r h1 = titleSection.SelectMany(c => c.ChildNodes).FirstOrDefault(x => x.Name == "h1"); var wordFamilyRoot = doc.DocumentNode.Descendants("vcom:wordfamily").ToList(); if (wordFamilyRoot.Any()) { var data = wordFamilyRoot.FirstOrDefault()?.GetAttributeValue("data", ""); var jsonData = System.Web.HttpUtility.HtmlDecode(data); var wordFamily = JsonConvert.DeserializeObject <List <WordFamily> >(jsonData); if (wordFamily.Count > 0) { var id = int.Parse(fi.Name.Replace(".txt", "")); var entity = new EnglishWordsEntities(); var word = entity.Roots.FirstOrDefault(x => x.ID == id); word.VocabularyWordFamilyJson = jsonData; entity.Roots.AddOrUpdate(word); entity.SaveChanges(); Console.WriteLine("Saved " + word.Word + " count: " + wordFamily.Count); } } // })); // if (listTask.Count > 10) // { // Task.WaitAll(listTask.ToArray()); // listTask.Clear(); // } } }