public static Task SaveToDataBase()
        {
            var entity = new EnglishWordsEntities();

            entity.Configuration.AutoDetectChangesEnabled = false;
            do
            {
                int counter = 0;
                while (!Queue.IsEmpty)
                {
                    Queue.TryDequeue(out var dic);
                    entity.Set <Wiktionary>().Add(dic);
                    counter++;
                    if (counter >= 10000)
                    {
                        entity.SaveChanges();
                        counter = 0;
                        Console.ForegroundColor = ConsoleColor.Yellow;
                        Console.WriteLine("Insert 10000\tfrom\t" + Queue.Count);
                        Console.ForegroundColor = ConsoleColor.Gray;
                    }
                }
                entity.SaveChanges();
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Insert data\tfrom\t" + Queue.Count);
                Console.ForegroundColor = ConsoleColor.Gray;
                Task.Delay(1000).Wait();
            } while (true);
        }
        public static void StartDownloadAsync()
        {
            var VocabularyCom = AppDomain.CurrentDomain.BaseDirectory + "\\VocabularyCom\\";
            var DictionaryCom = AppDomain.CurrentDomain.BaseDirectory + "\\DictionaryCom\\";

            Directory.CreateDirectory(VocabularyCom);
            Directory.CreateDirectory(DictionaryCom);
            do
            {
                var         entity   = new EnglishWordsEntities();
                List <Task> taskList = new List <Task>();
                var         rows     = entity.Roots.Where(x => x.DictionaryCom == null && x.Grouped).Take(100).ToList();
                if (rows.Count > 0)
                {
                    foreach (var row in rows)
                    {
                        var task = Task.Factory.StartNew(() =>
                        {
                            try
                            {
                                var resulT = GetDataDictionary(row, DictionaryCom + row.ID + ".txt",
                                                               "https://www.dictionary.com/noresult?term=" + row.Word);
                                resulT.Wait();
                                var result = resulT.Result;

                                var entity2 = new EnglishWordsEntities();
                                entity2.Roots.AddOrUpdate(result);
                                entity2.SaveChanges();
                                if (result.DictionaryCom.HasValue && result.DictionaryCom.Value)
                                {
                                    Console.ForegroundColor = ConsoleColor.DarkGreen;
                                    Console.WriteLine("Dictionary Found ... " + row.Word);
                                }
                                else
                                {
                                    Console.ForegroundColor = ConsoleColor.Yellow;
                                    Console.WriteLine("Dictionary Not Found ... " + row.Word);
                                }
                            }
                            catch (Exception e)
                            {
                                Console.ForegroundColor = ConsoleColor.Red;
                                Console.WriteLine(e);
                            }
                        });
                        taskList.Add(task);
                    }

                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();

                    //row.VocabularyCom = await GetDataVocabulary(row, VocabularyCom + row.ID + ".txt",
                    //  "https://www.vocabulary.com/dictionary/" + row.Word);
                }
                else
                {
                    break;
                }
            } while (true);
        }
Beispiel #3
0
 internal void SaveResourceTranslated(GetWordForTranslate_Result data, CallBankService objectT)
 {
     entity.WordTranslates.AddOrUpdate(new WordTranslate
     {
         WordID     = data.WordID,
         LanguageId = data.LangId,
         Translated = objectT.Text,
         AllWords   = objectT.All.Aggregate((x, y) => x + ", " + y).Trim(' ', ','),
         AllData    = data.Translated,
         CreateDate = DateTime.Now
     });
     entity.SaveChanges();
     if (entity.WordTranslates.Count(x => x.WordID == data.WordID) == entity.Languages.Count())
     {
         entity.AllWordFromPaymons.Where(x => x.ID == data.WordID)
         .UpdateFromQuery(x => new AllWordFromPaymon {
             Translated = true
         });
     }
 }
        public static void MakeWordFamiliy()
        {
            var entity  = new EnglishWordsEntities();
            var allroot = entity.Roots.ToList();

            var allwordFamily = allroot.Where(x => !string.IsNullOrEmpty(x.VocabularyWordFamilyJson)).Select(x =>
                                                                                                             JsonConvert.DeserializeObject <List <WordFamily> >(x.VocabularyWordFamilyJson))
                                .ToList();

            List <WordFamily> wordFamilies = new List <WordFamily>();

            foreach (var f in allwordFamily)
            {
                wordFamilies.AddRange(f);
            }

            var distincted = wordFamilies.Select(x => new WordFamilySimple()
            {
                parent = x.parent,
                word   = x.word,
            }).Distinct().ToList();
            List <Task> taskList     = new List <Task>();
            var         NeedToSearch = allroot.Where(x => x.Parent == null).ToList();

            foreach (var t in NeedToSearch)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    var r   = t;
                    var dis = distincted.FirstOrDefault(x => x.word == r.Word);
                    if (dis == null)
                    {
                        return;
                    }
                    var parent = allroot.FirstOrDefault(x => x.Word == dis.parent);
                    if (parent == null)
                    {
                        return;
                    }
                    Console.WriteLine($"Found Parent {parent.Word}\t{r.Word}");
                    r.Parent    = parent.ID;
                    var entity2 = new EnglishWordsEntities();
                    entity2.Roots.AddOrUpdate(r);
                    entity2.SaveChanges();
                }));
                if (taskList.Count > 20)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
        }
        public static void ExtractWordFamiliy()
        {
            var files       = Directory.GetFiles(@"F:\Projects\Geeksltd\Netflix\ConsoleAppWordProcess\bin\Debug\VocabularyCom", "*.*");
            var entity2     = new EnglishWordsEntities();
            var idsHaveData = entity2
                              .Roots
                              .Where(x => !string.IsNullOrEmpty(x.VocabularyWordFamilyJson))
                              .Select(x => x.ID)
                              .ToList();

            var mustToRead = files.Select(x => new
            {
                FilePath = x,
                ID       = int.Parse(x.Split(new[] { '\\' }, StringSplitOptions.RemoveEmptyEntries).LastOrDefault()?.Replace(".txt", ""))
            }).ToList().Where(x => !idsHaveData.Contains(x.ID)).Select(x => x.FilePath).ToList();

            List <Task> listTask = new List <Task>();

            foreach (var file in mustToRead)
            {
                listTask.Add(Task.Factory.StartNew(() =>
                {
                    FileInfo fi = new FileInfo(file);
                    var doc     = new HtmlDocument();
                    doc.Load(file);
                    var wordFamilyRoot = doc.DocumentNode.Descendants("vcom:wordfamily").ToList();
                    if (wordFamilyRoot.Any())
                    {
                        var data       = wordFamilyRoot.FirstOrDefault()?.GetAttributeValue("data", "");
                        var jsonData   = System.Web.HttpUtility.HtmlDecode(data);
                        var wordFamily = JsonConvert.DeserializeObject <List <WordFamily> >(jsonData);
                        if (wordFamily.Count > 0)
                        {
                            var id     = int.Parse(fi.Name.Replace(".txt", ""));
                            var entity = new EnglishWordsEntities();
                            var word   = entity.Roots.FirstOrDefault(x => x.ID == id);
                            word.VocabularyWordFamilyJson = jsonData;
                            entity.Roots.AddOrUpdate(word);
                            entity.SaveChanges();
                            Console.WriteLine("Saved " + word.Word + " count: " + wordFamily.Count);
                        }
                    }
                }));
                if (listTask.Count > 10)
                {
                    Task.WaitAll(listTask.ToArray());
                    listTask.Clear();
                }
            }
        }
        public static void MakeCount()
        {
            var           AllWords     = File.ReadAllLines(@"D:\temp\txt\txt.txt").ToList();
            List <string> processWords = new List <string>();

            foreach (var c in AllWords)
            {
                var res = NormalString(c).Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                if (res.Any())
                {
                    processWords.AddRange(res);
                }
            }

            var groupedWord = processWords.GroupBy(x => x).ToDictionary(kvp => kvp.Key, kvp => kvp.Count());

            var         entity   = new EnglishWordsEntities();
            var         allroot  = entity.Roots.ToList();
            List <Task> taskList = new List <Task>();

            foreach (var word in groupedWord)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    var w = allroot.FirstOrDefault(x => x.Word == word.Key);
                    if (w != null)
                    {
                        w.Count     = word.Value;
                        var entity2 = new EnglishWordsEntities();
                        entity2.Roots.AddOrUpdate(w);
                        entity2.SaveChanges();
                        Console.WriteLine($"{w.Word}\t{word.Value}");
                    }
                }));
                if (taskList.Count > 20)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
        }
        public static void GetWordRank()
        {
            var         allWords = File.ReadAllLines(@"D:\temp\txt\words_Rank.csv");
            List <Task> taskList = new List <Task>();

            foreach (var allWord in allWords)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    var splited = allWord.Split(new[] { ',', ' ', '"' }, StringSplitOptions.RemoveEmptyEntries);
                    if (splited.Length > 0)
                    {
                        if (int.TryParse(splited[0], out var id))
                        {
                            var entity   = new EnglishWordsEntities();
                            var listWord = splited.ToList();
                            listWord.RemoveAt(0);
                            var roots = entity.Roots.Where(x => listWord.Contains(x.Word)).ToList();
                            foreach (var root in roots)
                            {
                                root.RankFromPaymon = id;
                                entity.Roots.AddOrUpdate(root);
                            }
                            entity.SaveChanges();
                            Console.WriteLine("Saved Words ... " + allWord);
                        }
                    }
                }));
                if (taskList.Count > 10)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
            if (taskList.Count > 10)
            {
                Task.WaitAll(taskList.ToArray());
                taskList.Clear();
            }
        }
        public static void MakeCountFowWord()
        {
            var entity = new EnglishWordsEntities();

            Console.WriteLine($"Fetching Roots");
            var allroot = entity.Roots.Select(x => new
            {
                x.ID,
                Word = x.Word.ToLower().Trim(),
                x.Count
            }).ToList();

            Console.WriteLine($"Fetching RankAndCounts");
            var         paymonWord = entity.RankAndCounts.Where(x => x.NumberTimes == null).ToList();
            List <Task> taskList   = new List <Task>();

            foreach (var word in paymonWord)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    var rowWord =
                        (word.Word.Trim().ToLower() + "," + word.OtherForms.Trim().ToLower()).Split(new[] { ',' },
                                                                                                    StringSplitOptions.RemoveEmptyEntries).Select(x => x.Trim().ToLower()).ToList();
                    var sum          = allroot.Where(x => rowWord.Contains(x.Word)).Select(x => x.Count).DefaultIfEmpty(0).Sum();
                    word.NumberTimes = sum;
                    var entity2      = new EnglishWordsEntities();
                    entity2.RankAndCounts.AddOrUpdate(word);
                    entity2.SaveChanges();
                    Console.WriteLine($"{word.Rank}\t{rowWord.Aggregate((x, y) => x + "," + y)}\t{sum}");
                }));
                if (taskList.Count > 20)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
        }
        public static void StartDownloadVoabulary(object obj)
        {
            var VocabularyCom = AppDomain.CurrentDomain.BaseDirectory + "\\VocabularyCom\\";

            Directory.CreateDirectory(VocabularyCom);

            do
            {
                var         entity   = new EnglishWordsEntities();
                List <Task> taskList = new List <Task>();
                var         rows     = entity.Roots.Where(x =>
                                                          x.VocabularyCom == null //&& x.DictionaryCom != null
                                                                                  //&& x.DictionaryCom.Value
                                                          )
                                       .Take(10).ToList();
                if (rows.Count > 0)
                {
                    foreach (var row in rows)
                    {
                        var task = Task.Factory.StartNew(() =>
                        {
                            try
                            {
                                var resulT = GetDataVocabulary(row, VocabularyCom + row.ID + ".txt",
                                                               "https://www.vocabulary.com/dictionary/" + row.Word);
                                resulT.Wait();
                                var result        = resulT.Result;
                                var entity2       = new EnglishWordsEntities();
                                result.CreateDate = DateTime.Now;
                                if (result.VocabularyComStatusCode == 403)
                                {
                                    result.VocabularyCom = null;
                                }
                                entity2.Roots.AddOrUpdate(result);
                                entity2.SaveChanges();

                                if (result.VocabularyComStatusCode == 200)
                                {
                                    if (result.VocabularyCom != null && result.VocabularyCom.Value)
                                    {
                                        Console.ForegroundColor = ConsoleColor.Green;
                                        Console.WriteLine($"{DateTime.Now} Vocabulary Found ... " + row.Word);
                                    }
                                    else
                                    {
                                        Console.ForegroundColor = ConsoleColor.Yellow;
                                        Console.WriteLine($"{DateTime.Now} Vocabulary Not Found ... " + row.Word);
                                    }
                                }
                                else if (result.VocabularyComStatusCode == 403)
                                {
                                    Console.ForegroundColor = ConsoleColor.Red;
                                    Console.WriteLine($"{DateTime.Now} Vocabulary Forrbiden ... " + row.Word);
                                }
                                else
                                {
                                    Console.ForegroundColor = ConsoleColor.Magenta;
                                    Console.WriteLine($"{DateTime.Now} Vocabulary {result.VocabularyComStatusCode ?? 0}... " + row.Word);
                                }
                                //Console.WriteLine("Saved " + result.Word);
                            }
                            catch (Exception e)
                            {
                                Console.ForegroundColor = ConsoleColor.Red;
                                Console.WriteLine(e);
                            }
                        });
                        taskList.Add(task);
                    }
                    Task.WaitAll(taskList.ToArray());
                    //Task.Delay(1000).Wait();
                    taskList.Clear();
                    entity.Dispose();
                    GC.Collect();
                }
                else
                {
                    break;
                }
            } while (true);
        }
        public static void ExtractWordFamiliyFromDictionary()
        {
            var files   = Directory.GetFiles(@"F:\Projects\Geeksltd\Netflix\ConsoleAppWordProcess\bin\Debug\DictionaryCom", "*.*");
            var entity2 = new EnglishWordsEntities();


            var allword = entity2.Roots.ToList();

            var idsHaveData = allword
                              .Where(x => !string.IsNullOrEmpty(x.DictionaryWordFamily) || x.Parent != null)
                              .Select(x => x.ID)
                              .ToList();


            var mustToRead = files.Select(x => new
            {
                FilePath = x,
                ID       = int.Parse(x.Split(new[] { '\\' }, StringSplitOptions.RemoveEmptyEntries).LastOrDefault()?.Replace(".txt", ""))
            }).Where(x => !idsHaveData.Contains(x.ID)).ToList();  //.Select(x => x.FilePath)  //.ToList();


            List <Task> listTask = new List <Task>();

            foreach (var file in mustToRead)
            {
                //listTask.Add(Task.Factory.StartNew(() =>
                // {
                FileInfo fi  = new FileInfo(file.FilePath);
                var      doc = new HtmlDocument();
                doc.Load(file.FilePath);


                var currentWord = allword.FirstOrDefault(x => x.ID == file.ID);
                if (currentWord.Parent.HasValue)
                {
                    continue;
                }


                var h1Element = doc.DocumentNode.Descendants("h1");
                if (h1Element.Count() > 1)
                {
                    File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "\\h1.txt", "\r\n" + currentWord.ID);
                    Console.WriteLine("File Have more than H1 " + currentWord.Word);
                }
                else
                {
                    var h1 = h1Element.FirstOrDefault().InnerText;
                    if (h1 != null && currentWord != null && h1.ToLower() != currentWord.Word.ToLower())
                    {
                        var lowered = h1.ToLower();
                        var parent  = allword.FirstOrDefault(x => x.Word == lowered);
                        if (parent != null)
                        {
                            currentWord.Parent = parent.ID;
                            currentWord.DictionaryWordFamily = lowered;
                            var entity = new EnglishWordsEntities();
                            entity.Roots.AddOrUpdate(currentWord);
                            entity.SaveChanges();

                            Console.WriteLine($"Found Origin  {lowered}\t{currentWord.Word}");
                        }
                    }
                    else
                    {
                        var section = doc.DocumentNode.SelectSingleNode("//h2[@id='wordOrigin']");

                        if (section != null && section.ParentNode != null)
                        {
                            var parentNode = section.ParentNode;
                            var origin     = parentNode.Descendants()
                                             .Where(x => x.Name == "a" && x.GetAttributeValue("class", "") == "luna-xref").Select(x => x.InnerText).ToList().OrderByDescending(x => x).FirstOrDefault();

                            if (origin != null && origin.ToLower() != currentWord.Word)
                            {
                                var lowered = origin.ToLower();
                                var parent  = allword.FirstOrDefault(x => x.Word == lowered);
                                if (parent != null)
                                {
                                    currentWord.Parent = parent.ID;
                                    currentWord.DictionaryWordFamily = lowered;
                                    var entity = new EnglishWordsEntities();
                                    entity.Roots.AddOrUpdate(currentWord);
                                    entity.SaveChanges();
                                    Console.WriteLine($"Found Origin  {lowered}\t{currentWord.Word}");
                                }
                            }
                        }
                    }
                }


                //var titleSection = doc.DocumentNode
                //     .Descendants("section")
                //    .Where(e => e.GetAttributeValue("class", "").Contains("css-0")).ToList();



                //   va//r h1 = titleSection.SelectMany(c => c.ChildNodes).FirstOrDefault(x => x.Name == "h1");



                var wordFamilyRoot = doc.DocumentNode.Descendants("vcom:wordfamily").ToList();
                if (wordFamilyRoot.Any())
                {
                    var data       = wordFamilyRoot.FirstOrDefault()?.GetAttributeValue("data", "");
                    var jsonData   = System.Web.HttpUtility.HtmlDecode(data);
                    var wordFamily = JsonConvert.DeserializeObject <List <WordFamily> >(jsonData);
                    if (wordFamily.Count > 0)
                    {
                        var id     = int.Parse(fi.Name.Replace(".txt", ""));
                        var entity = new EnglishWordsEntities();
                        var word   = entity.Roots.FirstOrDefault(x => x.ID == id);
                        word.VocabularyWordFamilyJson = jsonData;
                        entity.Roots.AddOrUpdate(word);
                        entity.SaveChanges();
                        Console.WriteLine("Saved " + word.Word + " count: " + wordFamily.Count);
                    }
                }
                // }));
                //  if (listTask.Count > 10)
                //  {
                //     Task.WaitAll(listTask.ToArray());
                //     listTask.Clear();
                //  }
            }
        }