public static List <Result1> GetData(int languageId)
        {
            EnglishWordsEntities entity = new EnglishWordsEntities();

            entity.Database.CommandTimeout = int.MaxValue;

            List <Result1> allData = new List <Result1>();

            Console.WriteLine("Fetching before lock Data...");
            lock (ob1)
            {
                Console.WriteLine("Fetching Data...");
                allData = (from f in entity.AllWordFromPaymons
                           join t in entity.WordTranslates on f.ID equals t.WordID
                           where t.LanguageId == languageId &&
                           t.Proccessed == null       //f.IsPrimary == true && t.LanguageId == lan.ID
                           orderby f.Word

                           select new Result1
                {
                    WordId = t.WordID,
                    LanId = t.LanguageId,
                    Word = f.Word,
                    AllData = t.AllData,
                    First = "",
                }).Take(10000).ToList();
            }
            Console.WriteLine("Fetched Data");
            return(allData);
        }
        public static Task SaveToDataBase()
        {
            var entity = new EnglishWordsEntities();

            entity.Configuration.AutoDetectChangesEnabled = false;
            do
            {
                int counter = 0;
                while (!Queue.IsEmpty)
                {
                    Queue.TryDequeue(out var dic);
                    entity.Set <Wiktionary>().Add(dic);
                    counter++;
                    if (counter >= 10000)
                    {
                        entity.SaveChanges();
                        counter = 0;
                        Console.ForegroundColor = ConsoleColor.Yellow;
                        Console.WriteLine("Insert 10000\tfrom\t" + Queue.Count);
                        Console.ForegroundColor = ConsoleColor.Gray;
                    }
                }
                entity.SaveChanges();
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Insert data\tfrom\t" + Queue.Count);
                Console.ForegroundColor = ConsoleColor.Gray;
                Task.Delay(1000).Wait();
            } while (true);
        }
        public static void StartDownloadAsync()
        {
            var VocabularyCom = AppDomain.CurrentDomain.BaseDirectory + "\\VocabularyCom\\";
            var DictionaryCom = AppDomain.CurrentDomain.BaseDirectory + "\\DictionaryCom\\";

            Directory.CreateDirectory(VocabularyCom);
            Directory.CreateDirectory(DictionaryCom);
            do
            {
                var         entity   = new EnglishWordsEntities();
                List <Task> taskList = new List <Task>();
                var         rows     = entity.Roots.Where(x => x.DictionaryCom == null && x.Grouped).Take(100).ToList();
                if (rows.Count > 0)
                {
                    foreach (var row in rows)
                    {
                        var task = Task.Factory.StartNew(() =>
                        {
                            try
                            {
                                var resulT = GetDataDictionary(row, DictionaryCom + row.ID + ".txt",
                                                               "https://www.dictionary.com/noresult?term=" + row.Word);
                                resulT.Wait();
                                var result = resulT.Result;

                                var entity2 = new EnglishWordsEntities();
                                entity2.Roots.AddOrUpdate(result);
                                entity2.SaveChanges();
                                if (result.DictionaryCom.HasValue && result.DictionaryCom.Value)
                                {
                                    Console.ForegroundColor = ConsoleColor.DarkGreen;
                                    Console.WriteLine("Dictionary Found ... " + row.Word);
                                }
                                else
                                {
                                    Console.ForegroundColor = ConsoleColor.Yellow;
                                    Console.WriteLine("Dictionary Not Found ... " + row.Word);
                                }
                            }
                            catch (Exception e)
                            {
                                Console.ForegroundColor = ConsoleColor.Red;
                                Console.WriteLine(e);
                            }
                        });
                        taskList.Add(task);
                    }

                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();

                    //row.VocabularyCom = await GetDataVocabulary(row, VocabularyCom + row.ID + ".txt",
                    //  "https://www.vocabulary.com/dictionary/" + row.Word);
                }
                else
                {
                    break;
                }
            } while (true);
        }
        public static void ProcessAll()
        {
            Console.Clear();
            Console.WriteLine("Start ProcessAll");
            EnglishWordsEntities entity = new EnglishWordsEntities();
            var langauages = entity.WordTranslates.Where(x => x.Proccessed == null).Select(x => (int?)x.LanguageId).Distinct()
                             .Take(1).FirstOrDefault();
            List <Task> taskList = new List <Task>();

            if (!langauages.HasValue)
            {
                return;
            }
            var dataList = GetData(langauages.Value);
            var count    = dataList.Count / 10;

            for (var id = 0; id < 10; id++)
            {
                var forLast = dataList.Count - (id * count);
                var range   = dataList.GetRange(count * id,
                                                id + 1 == 10 ? forLast : count);
                Console.WriteLine("Start Thread " + id + $" List: {range.Count}");
                //  taskList.Add(Task.Factory.StartNew(() => ProcessTranslateFiles(langauages.Value, range)));
            }

            Task.WaitAll(taskList.ToArray());
            ProcessAll();
        }
예제 #5
0
        private static void ConvertToJson()
        {
            EnglishWordsEntities entity = new EnglishWordsEntities();
            var path      = Directory.CreateDirectory(AppDomain.CurrentDomain.BaseDirectory + "\\JsonFile");
            var languages = entity.Languages.ToList();// entity.GetCompletedLanguages();
            var imported  = Directory.GetFiles(path.FullName, "*.*")
                            .Select(x => x.Split('\\').LastOrDefault().Split('.').FirstOrDefault()).ToList();
            var needToExport = languages.Where(x => !imported.Contains(x.LanguageCode)).ToList();

            foreach (var lan in needToExport)
            {
                var sb      = new StringBuilder();
                var allData = (from f in entity.AllWordFromPaymons
                               join t in entity.WordTranslates on f.ID equals t.WordID
                               where f.IsPrimary == true && t.LanguageId == lan.ID
                               orderby f.Word

                               select new Result1
                {
                    Word = f.Word,
                    Translated = t.Translated
                }).ToList().GroupBy(car => car.Word)
                              .Select(g => g.First())
                              .ToList();

                foreach (var t in allData)
                {
                    sb.Append($"\"{t.Word}\": \"{t.Translated}\", ");
                }
                File.WriteAllText(path.FullName + "\\" + lan.LanguageCode + ".json", "{" + sb.ToString().Trim(',', ' ') + "}", Encoding.UTF8);
                Console.WriteLine("Code " + lan.LanguageCode);
            }
        }
예제 #6
0
        private static void ConvertToJson3()
        {
            var entity = new EnglishWordsEntities();

            var path = Directory.CreateDirectory(AppDomain.CurrentDomain.BaseDirectory + "\\JsonFileMerged2");

            // var filesNeed = Directory.GetFiles(AppDomain.CurrentDomain.BaseDirectory + "\\JsonFile2 - Copy");

            //  var listed = filesNeed.Select(x => x.Split('\\').LastOrDefault().Replace(".json", "").ToLower()).ToList();

            var languages = entity.Languages.ToList(); // entity.GetCompletedLanguages();
            var imported  = Directory.GetFiles(path.FullName, "*.*")
                            .Select(x => x.Split('\\').LastOrDefault().Split('.').FirstOrDefault()).ToList();
            var needToExport = languages.Where(x => !imported.Contains(x.LanguageCode)).ToList();
            var allwords     = entity.AllWordFromPaymons.Where(x => x.IsPrimary == true).OrderBy(x => x.ID).ToList();

            foreach (var lan in needToExport.Where(x => x.LanguageCode == "fa"))
            {
                var allData = entity.WordTranslates.Where(x => x.LanguageId == lan.ID).Select(x => new {
                    x.WordID,
                    x.AllWords
                }).Distinct().ToList();
                var wiki  = entity.Wikis.Where(x => x.languageId == lan.ID).ToList();
                var words = new Dictionary <string, string>();
                foreach (var t in allwords)
                {
                    var list      = new List <string>();
                    var allwords2 = allData.Where(x => x.WordID == t.ID).Select(x => x.AllWords).FirstOrDefault();
                    if (allwords2 != null)
                    {
                        list.AddRange(allwords2.Trim().Split(new[] { ',', '/', '\\', '\r', '\n', '|' }, StringSplitOptions.RemoveEmptyEntries));
                    }

                    var wik = wiki.Where(x => x.wordId == t.ID).ToList();
                    if (wik.Count > 0)
                    {
                        foreach (var wiki1 in wik)
                        {
                            list.AddRange(wiki1.Translated.Trim().Split(new[] { ',', '/', '\\', '|', '\r', '\n' },
                                                                        StringSplitOptions.RemoveEmptyEntries));
                        }
                    }

                    if (list.Count > 0)
                    {
                        words.Add(t.Word.ToLower(), list
                                  .Select(x => x.Trim().ToLower())
                                  .Where(x => x.Length > 0)
                                  .Distinct(new FlagComparer(lan.LanguageCode))
                                  .Aggregate((x, y) => x + " , " + y));//.Replace("\\", "/"));
                    }
                }
                // File.WriteAllText(path.FullName + "\\" + lan.LanguageCode + ".json", "{" + sb.ToString().Trim(',', ' ') + "}", Encoding.UTF8);
                File.WriteAllText(path.FullName + "\\" + lan.LanguageCode + ".json", JsonConvert.SerializeObject(words), Encoding.UTF8);
                Console.WriteLine("Code " + lan.LanguageCode);
            }
        }
예제 #7
0
        public static void ImportJokeJson()
        {
            List <Joke> list = new List <Joke>();

            list.AddRange(ImportFile("stupidstuff.json", JokeSourceEnum.StupidstuffJson));
            list.AddRange(ImportFile("reddit_jokes.json", JokeSourceEnum.RedditJokesJson));
            list.AddRange(ImportFile("wocka.json", JokeSourceEnum.WockaJson));
            var enity = new EnglishWordsEntities();

            enity.BulkInsert <Joke>(list);
        }
        public static void MakeWordFamiliy()
        {
            var entity  = new EnglishWordsEntities();
            var allroot = entity.Roots.ToList();

            var allwordFamily = allroot.Where(x => !string.IsNullOrEmpty(x.VocabularyWordFamilyJson)).Select(x =>
                                                                                                             JsonConvert.DeserializeObject <List <WordFamily> >(x.VocabularyWordFamilyJson))
                                .ToList();

            List <WordFamily> wordFamilies = new List <WordFamily>();

            foreach (var f in allwordFamily)
            {
                wordFamilies.AddRange(f);
            }

            var distincted = wordFamilies.Select(x => new WordFamilySimple()
            {
                parent = x.parent,
                word   = x.word,
            }).Distinct().ToList();
            List <Task> taskList     = new List <Task>();
            var         NeedToSearch = allroot.Where(x => x.Parent == null).ToList();

            foreach (var t in NeedToSearch)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    var r   = t;
                    var dis = distincted.FirstOrDefault(x => x.word == r.Word);
                    if (dis == null)
                    {
                        return;
                    }
                    var parent = allroot.FirstOrDefault(x => x.Word == dis.parent);
                    if (parent == null)
                    {
                        return;
                    }
                    Console.WriteLine($"Found Parent {parent.Word}\t{r.Word}");
                    r.Parent    = parent.ID;
                    var entity2 = new EnglishWordsEntities();
                    entity2.Roots.AddOrUpdate(r);
                    entity2.SaveChanges();
                }));
                if (taskList.Count > 20)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
        }
        public static void ExtractWordFamiliy()
        {
            var files       = Directory.GetFiles(@"F:\Projects\Geeksltd\Netflix\ConsoleAppWordProcess\bin\Debug\VocabularyCom", "*.*");
            var entity2     = new EnglishWordsEntities();
            var idsHaveData = entity2
                              .Roots
                              .Where(x => !string.IsNullOrEmpty(x.VocabularyWordFamilyJson))
                              .Select(x => x.ID)
                              .ToList();

            var mustToRead = files.Select(x => new
            {
                FilePath = x,
                ID       = int.Parse(x.Split(new[] { '\\' }, StringSplitOptions.RemoveEmptyEntries).LastOrDefault()?.Replace(".txt", ""))
            }).ToList().Where(x => !idsHaveData.Contains(x.ID)).Select(x => x.FilePath).ToList();

            List <Task> listTask = new List <Task>();

            foreach (var file in mustToRead)
            {
                listTask.Add(Task.Factory.StartNew(() =>
                {
                    FileInfo fi = new FileInfo(file);
                    var doc     = new HtmlDocument();
                    doc.Load(file);
                    var wordFamilyRoot = doc.DocumentNode.Descendants("vcom:wordfamily").ToList();
                    if (wordFamilyRoot.Any())
                    {
                        var data       = wordFamilyRoot.FirstOrDefault()?.GetAttributeValue("data", "");
                        var jsonData   = System.Web.HttpUtility.HtmlDecode(data);
                        var wordFamily = JsonConvert.DeserializeObject <List <WordFamily> >(jsonData);
                        if (wordFamily.Count > 0)
                        {
                            var id     = int.Parse(fi.Name.Replace(".txt", ""));
                            var entity = new EnglishWordsEntities();
                            var word   = entity.Roots.FirstOrDefault(x => x.ID == id);
                            word.VocabularyWordFamilyJson = jsonData;
                            entity.Roots.AddOrUpdate(word);
                            entity.SaveChanges();
                            Console.WriteLine("Saved " + word.Word + " count: " + wordFamily.Count);
                        }
                    }
                }));
                if (listTask.Count > 10)
                {
                    Task.WaitAll(listTask.ToArray());
                    listTask.Clear();
                }
            }
        }
        public static void MakeCountFowWordSave()
        {
            var entity = new EnglishWordsEntities();

            Console.WriteLine("Fething data....");
            var data = entity.RankAndCounts.ToList();

            Console.WriteLine("Convert To CSV....");
            var csv = data.ToCsv();

            Console.WriteLine("Savging to file....");
            //var csv = JsonConvert.SerializeObject(data.Take(2000).ToList());
            File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "\\RankAndCounts.csv", csv, Encoding.UTF8);
        }
예제 #11
0
        public static void ExportCSV()
        {
            var entity = new EnglishWordsEntities();

            Console.WriteLine("Fething data....");
            var data = entity.GetJokeBest().ToList();

            Console.WriteLine("Convert To CSV....");
            var csv = data.ToCsv();

            Console.WriteLine("Savging to file....");
            //var csv = JsonConvert.SerializeObject(data.Take(2000).ToList());
            File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "\\joke_has_score.csv", csv, Encoding.UTF8);
        }
예제 #12
0
        public static void ImportWords()
        {
            var wordText = File.ReadAllText(AppDomain.CurrentDomain.BaseDirectory + "\\Import_Word.txt");
            var words    = wordText.Split(new char[] { ',', '\"', ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries);
            var lists    = words.Select(x => new AllWordFromPaymon {
                Word = x.Trim(),
            }).Where(x => !string.IsNullOrEmpty(x.Word)).ToList();

            Console.WriteLine("Intering..");
            var entity = new EnglishWordsEntities();

            entity.BulkInsert(lists);
            Console.WriteLine("Inserted");
        }
        public static void MakeCount()
        {
            var           AllWords     = File.ReadAllLines(@"D:\temp\txt\txt.txt").ToList();
            List <string> processWords = new List <string>();

            foreach (var c in AllWords)
            {
                var res = NormalString(c).Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                if (res.Any())
                {
                    processWords.AddRange(res);
                }
            }

            var groupedWord = processWords.GroupBy(x => x).ToDictionary(kvp => kvp.Key, kvp => kvp.Count());

            var         entity   = new EnglishWordsEntities();
            var         allroot  = entity.Roots.ToList();
            List <Task> taskList = new List <Task>();

            foreach (var word in groupedWord)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    var w = allroot.FirstOrDefault(x => x.Word == word.Key);
                    if (w != null)
                    {
                        w.Count     = word.Value;
                        var entity2 = new EnglishWordsEntities();
                        entity2.Roots.AddOrUpdate(w);
                        entity2.SaveChanges();
                        Console.WriteLine($"{w.Word}\t{word.Value}");
                    }
                }));
                if (taskList.Count > 20)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
        }
        public static void GetWordRank()
        {
            var         allWords = File.ReadAllLines(@"D:\temp\txt\words_Rank.csv");
            List <Task> taskList = new List <Task>();

            foreach (var allWord in allWords)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    var splited = allWord.Split(new[] { ',', ' ', '"' }, StringSplitOptions.RemoveEmptyEntries);
                    if (splited.Length > 0)
                    {
                        if (int.TryParse(splited[0], out var id))
                        {
                            var entity   = new EnglishWordsEntities();
                            var listWord = splited.ToList();
                            listWord.RemoveAt(0);
                            var roots = entity.Roots.Where(x => listWord.Contains(x.Word)).ToList();
                            foreach (var root in roots)
                            {
                                root.RankFromPaymon = id;
                                entity.Roots.AddOrUpdate(root);
                            }
                            entity.SaveChanges();
                            Console.WriteLine("Saved Words ... " + allWord);
                        }
                    }
                }));
                if (taskList.Count > 10)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
            if (taskList.Count > 10)
            {
                Task.WaitAll(taskList.ToArray());
                taskList.Clear();
            }
        }
        public static void MakeCountFowWord()
        {
            var entity = new EnglishWordsEntities();

            Console.WriteLine($"Fetching Roots");
            var allroot = entity.Roots.Select(x => new
            {
                x.ID,
                Word = x.Word.ToLower().Trim(),
                x.Count
            }).ToList();

            Console.WriteLine($"Fetching RankAndCounts");
            var         paymonWord = entity.RankAndCounts.Where(x => x.NumberTimes == null).ToList();
            List <Task> taskList   = new List <Task>();

            foreach (var word in paymonWord)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    var rowWord =
                        (word.Word.Trim().ToLower() + "," + word.OtherForms.Trim().ToLower()).Split(new[] { ',' },
                                                                                                    StringSplitOptions.RemoveEmptyEntries).Select(x => x.Trim().ToLower()).ToList();
                    var sum          = allroot.Where(x => rowWord.Contains(x.Word)).Select(x => x.Count).DefaultIfEmpty(0).Sum();
                    word.NumberTimes = sum;
                    var entity2      = new EnglishWordsEntities();
                    entity2.RankAndCounts.AddOrUpdate(word);
                    entity2.SaveChanges();
                    Console.WriteLine($"{word.Rank}\t{rowWord.Aggregate((x, y) => x + "," + y)}\t{sum}");
                }));
                if (taskList.Count > 20)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
        }
        public static void MakeWordFamiliyFromDataBase()
        {
            var entity  = new EnglishWordsEntities();
            var allroot = entity.Roots.ToList();
            var roots   = allroot.Where(x => x.Parent == null && x.Count != null && x.Grouped == false).OrderBy(c => c.Word).ToList();


            List <Task> taskList = new List <Task>();

            foreach (var r in roots)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    FindAllDependency(allroot, r.ID, r.ID, 0);
                    Console.WriteLine($"Grouped {r.Word} {family[r.ID].Count}");
                }));
                if (taskList.Count > 10)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }


            foreach (var f in family)
            {
                taskList.Add(Task.Factory.StartNew(() =>
                {
                    StringBuilder currentSb = new StringBuilder();
                    var root = allroot.FirstOrDefault(x => x.ID == f.Key);
                    if (root != null)
                    {
                        var count = 0;

                        count += root.Count ?? 0;

                        currentSb.Append(root.Word);
                        currentSb.Append(",");

                        if (f.Value != null && f.Value.Count > 0)
                        {
                            var words = allroot.Where(x => f.Value.Contains(x.ID)).OrderBy(x => x.Word).ToList();
                            count    += words.Sum(x => x.Count ?? 0);

                            var familiy = words.Select(x => x.Word)
                                          .Aggregate((x, y) => x + "," + y).Trim(',');
                            currentSb.Append($"\"{familiy}\"");
                        }

                        currentSb.Append(",");
                        currentSb.Append(count);
                        Console.WriteLine(currentSb);
                        var model = new FamilyResult()
                        {
                            Count = count,
                            SVC   = currentSb.ToString().Replace("\r", "").Replace("\n", "")
                        };
                        FamilyGrouped.TryAdd(f.Key, model);

                        Console.WriteLine($"FamilyGrouped {f.Key} {model.SVC}");
                    }
                }));
                if (taskList.Count > 10)
                {
                    Task.WaitAll(taskList.ToArray());
                    taskList.Clear();
                }
            }
            Task.WaitAll(taskList.ToArray());
            Console.WriteLine("Aggrigating...");
            try
            {
                var ordered = FamilyGrouped.Values.OrderByDescending(x => x.Count).ToList().Select(x => x.SVC).Aggregate((x, y) => x + "\r\n" + y);
                Console.WriteLine("Aggrigatined");
                Console.WriteLine("Writing to file...");
                File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "\\WordFamily_NotGrouped.txt", ordered, Encoding.UTF8);
                Console.WriteLine("Wroted file");
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
        public static void ExtractWordFamiliyFromDictionary()
        {
            var files   = Directory.GetFiles(@"F:\Projects\Geeksltd\Netflix\ConsoleAppWordProcess\bin\Debug\DictionaryCom", "*.*");
            var entity2 = new EnglishWordsEntities();


            var allword = entity2.Roots.ToList();

            var idsHaveData = allword
                              .Where(x => !string.IsNullOrEmpty(x.DictionaryWordFamily) || x.Parent != null)
                              .Select(x => x.ID)
                              .ToList();


            var mustToRead = files.Select(x => new
            {
                FilePath = x,
                ID       = int.Parse(x.Split(new[] { '\\' }, StringSplitOptions.RemoveEmptyEntries).LastOrDefault()?.Replace(".txt", ""))
            }).Where(x => !idsHaveData.Contains(x.ID)).ToList();  //.Select(x => x.FilePath)  //.ToList();


            List <Task> listTask = new List <Task>();

            foreach (var file in mustToRead)
            {
                //listTask.Add(Task.Factory.StartNew(() =>
                // {
                FileInfo fi  = new FileInfo(file.FilePath);
                var      doc = new HtmlDocument();
                doc.Load(file.FilePath);


                var currentWord = allword.FirstOrDefault(x => x.ID == file.ID);
                if (currentWord.Parent.HasValue)
                {
                    continue;
                }


                var h1Element = doc.DocumentNode.Descendants("h1");
                if (h1Element.Count() > 1)
                {
                    File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "\\h1.txt", "\r\n" + currentWord.ID);
                    Console.WriteLine("File Have more than H1 " + currentWord.Word);
                }
                else
                {
                    var h1 = h1Element.FirstOrDefault().InnerText;
                    if (h1 != null && currentWord != null && h1.ToLower() != currentWord.Word.ToLower())
                    {
                        var lowered = h1.ToLower();
                        var parent  = allword.FirstOrDefault(x => x.Word == lowered);
                        if (parent != null)
                        {
                            currentWord.Parent = parent.ID;
                            currentWord.DictionaryWordFamily = lowered;
                            var entity = new EnglishWordsEntities();
                            entity.Roots.AddOrUpdate(currentWord);
                            entity.SaveChanges();

                            Console.WriteLine($"Found Origin  {lowered}\t{currentWord.Word}");
                        }
                    }
                    else
                    {
                        var section = doc.DocumentNode.SelectSingleNode("//h2[@id='wordOrigin']");

                        if (section != null && section.ParentNode != null)
                        {
                            var parentNode = section.ParentNode;
                            var origin     = parentNode.Descendants()
                                             .Where(x => x.Name == "a" && x.GetAttributeValue("class", "") == "luna-xref").Select(x => x.InnerText).ToList().OrderByDescending(x => x).FirstOrDefault();

                            if (origin != null && origin.ToLower() != currentWord.Word)
                            {
                                var lowered = origin.ToLower();
                                var parent  = allword.FirstOrDefault(x => x.Word == lowered);
                                if (parent != null)
                                {
                                    currentWord.Parent = parent.ID;
                                    currentWord.DictionaryWordFamily = lowered;
                                    var entity = new EnglishWordsEntities();
                                    entity.Roots.AddOrUpdate(currentWord);
                                    entity.SaveChanges();
                                    Console.WriteLine($"Found Origin  {lowered}\t{currentWord.Word}");
                                }
                            }
                        }
                    }
                }


                //var titleSection = doc.DocumentNode
                //     .Descendants("section")
                //    .Where(e => e.GetAttributeValue("class", "").Contains("css-0")).ToList();



                //   va//r h1 = titleSection.SelectMany(c => c.ChildNodes).FirstOrDefault(x => x.Name == "h1");



                var wordFamilyRoot = doc.DocumentNode.Descendants("vcom:wordfamily").ToList();
                if (wordFamilyRoot.Any())
                {
                    var data       = wordFamilyRoot.FirstOrDefault()?.GetAttributeValue("data", "");
                    var jsonData   = System.Web.HttpUtility.HtmlDecode(data);
                    var wordFamily = JsonConvert.DeserializeObject <List <WordFamily> >(jsonData);
                    if (wordFamily.Count > 0)
                    {
                        var id     = int.Parse(fi.Name.Replace(".txt", ""));
                        var entity = new EnglishWordsEntities();
                        var word   = entity.Roots.FirstOrDefault(x => x.ID == id);
                        word.VocabularyWordFamilyJson = jsonData;
                        entity.Roots.AddOrUpdate(word);
                        entity.SaveChanges();
                        Console.WriteLine("Saved " + word.Word + " count: " + wordFamily.Count);
                    }
                }
                // }));
                //  if (listTask.Count > 10)
                //  {
                //     Task.WaitAll(listTask.ToArray());
                //     listTask.Clear();
                //  }
            }
        }
        public static void StartDownloadVoabulary(object obj)
        {
            var VocabularyCom = AppDomain.CurrentDomain.BaseDirectory + "\\VocabularyCom\\";

            Directory.CreateDirectory(VocabularyCom);

            do
            {
                var         entity   = new EnglishWordsEntities();
                List <Task> taskList = new List <Task>();
                var         rows     = entity.Roots.Where(x =>
                                                          x.VocabularyCom == null //&& x.DictionaryCom != null
                                                                                  //&& x.DictionaryCom.Value
                                                          )
                                       .Take(10).ToList();
                if (rows.Count > 0)
                {
                    foreach (var row in rows)
                    {
                        var task = Task.Factory.StartNew(() =>
                        {
                            try
                            {
                                var resulT = GetDataVocabulary(row, VocabularyCom + row.ID + ".txt",
                                                               "https://www.vocabulary.com/dictionary/" + row.Word);
                                resulT.Wait();
                                var result        = resulT.Result;
                                var entity2       = new EnglishWordsEntities();
                                result.CreateDate = DateTime.Now;
                                if (result.VocabularyComStatusCode == 403)
                                {
                                    result.VocabularyCom = null;
                                }
                                entity2.Roots.AddOrUpdate(result);
                                entity2.SaveChanges();

                                if (result.VocabularyComStatusCode == 200)
                                {
                                    if (result.VocabularyCom != null && result.VocabularyCom.Value)
                                    {
                                        Console.ForegroundColor = ConsoleColor.Green;
                                        Console.WriteLine($"{DateTime.Now} Vocabulary Found ... " + row.Word);
                                    }
                                    else
                                    {
                                        Console.ForegroundColor = ConsoleColor.Yellow;
                                        Console.WriteLine($"{DateTime.Now} Vocabulary Not Found ... " + row.Word);
                                    }
                                }
                                else if (result.VocabularyComStatusCode == 403)
                                {
                                    Console.ForegroundColor = ConsoleColor.Red;
                                    Console.WriteLine($"{DateTime.Now} Vocabulary Forrbiden ... " + row.Word);
                                }
                                else
                                {
                                    Console.ForegroundColor = ConsoleColor.Magenta;
                                    Console.WriteLine($"{DateTime.Now} Vocabulary {result.VocabularyComStatusCode ?? 0}... " + row.Word);
                                }
                                //Console.WriteLine("Saved " + result.Word);
                            }
                            catch (Exception e)
                            {
                                Console.ForegroundColor = ConsoleColor.Red;
                                Console.WriteLine(e);
                            }
                        });
                        taskList.Add(task);
                    }
                    Task.WaitAll(taskList.ToArray());
                    //Task.Delay(1000).Wait();
                    taskList.Clear();
                    entity.Dispose();
                    GC.Collect();
                }
                else
                {
                    break;
                }
            } while (true);
        }
예제 #19
0
        private static void ProcessTranslateFiles(int languageId, List <Result1> allData)
        {
            EnglishWordsEntities entity = new EnglishWordsEntities();

            entity.Database.CommandTimeout = int.MaxValue;
            Console.WriteLine("Processing...");

            foreach (var result1 in allData)
            {
                var dicAll = new List <string>();

                var objectT = JsonConvert.DeserializeObject <CallBankService>(result1.AllData);
                var body    = JsonConvert.DeserializeObject <JArray>(objectT.Raw);

                if (body.Count > 0)
                {
                    var first = body.First <JToken>();
                    var item  = first.FirstOrDefault();
                    if (item != null)
                    {
                        result1.Verified = item[4].Value <int>() == 1;
                        result1.First    = item[0].Value <string>();
                    }
                }


                if (body.Count > 5 && body[5].HasValues && !result1.Verified)
                {
                    var item = body[5].FirstOrDefault();
                    if (item != null && item[3].HasValues && item[2].HasValues && item[3].Any())
                    {
                        var googleWord = item[0].Value <string>().ToLower();
                        var myword     = result1.Word.ToLower();
                        if (googleWord != myword)
                        {
                            File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "\\DeffWords.txt",
                                               "\r\n" + myword + "\t" + googleWord);
                        }
                        var goodRank = item[3][0].Values <int>().Select((r, i) => new
                        {
                            Index = i,
                            Rank  = r,
                        }).OrderByDescending(x => x.Rank).FirstOrDefault();
                        if (goodRank != null)
                        {
                            if (item[2].Count() > goodRank.Index)
                            {
                                result1.First = item[2][goodRank.Index][0].Value <string>();
                            }
                            else
                            {
                                result1.First = item[2].FirstOrDefault()?[0].Value <string>();
                            }
                        }
                    }
                }

                if (!string.IsNullOrEmpty(result1.First) && result1.First.Length > 0)
                {
                    dicAll.Add(result1.First.Trim());
                }

                if (body.Count > 1 && body[1].HasValues)
                {
                    foreach (var v in body[1])
                    {
                        if (v.HasValues && v.Count() > 1)
                        {
                            var dic = v[1].Select(x => x.Value <string>()).ToList();
                            dicAll.AddRange(dic);
                        }
                    }
                }

                dicAll = dicAll.Distinct().ToList();
                var proceessd = dicAll.Aggregate((x, y) => x + ", " + y).Trim(' ', ',');

                entity.WordTranslates.Where(x => x.WordID == result1.WordId && x.LanguageId == result1.LanId)
                .UpdateFromQuery(x => new WordTranslate {
                    AllWords = proceessd, Translated = result1.First, Proccessed = true
                });

                Console.WriteLine($"Update {result1.WordId}\t{result1.LanId}\t{result1.Word}");
            }
        }
예제 #20
0
 public ResourceService()
 {
     entity = new EnglishWordsEntities();
 }
        /// <summary>
        /// Find Word Combinations
        /// </summary>
        public static void CalculateWordCombinations()
        {
            try
            {
                //var processWords = File.ReadAllLines(AppDomain.CurrentDomain.BaseDirectory + "\\needprocess.txt").ToList();

                Console.ForegroundColor = ConsoleColor.White;
                Console.WriteLine("Preparing read subtitle");
                var AllWords     = File.ReadAllLines(@"D:\temp\txt\txt.txt").ToList();
                var processWords = new List <List <string> >();
                foreach (var c in AllWords)
                {
                    var res = NormalString(c).Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    if (res.Any() && res.Length > 1)
                    {
                        processWords.Add(res.ToList());
                    }
                }

                Console.WriteLine($"All Lines {processWords.Count}");
                Console.WriteLine("Preparing processWords");


                var threadCount = 10;
                var count       = processWords.Count / threadCount;


                var taskList = new List <Task>();



                for (var indexTh = -1; indexTh < threadCount;)
                {
                    indexTh++;
                    if (indexTh < threadCount)
                    {
                        var forLast = processWords.Count - (indexTh * count);
                        var range   = processWords.GetRange(count * indexTh,
                                                            indexTh + 1 == threadCount ? forLast : count);
                        Console.WriteLine("Start Thread " + indexTh + " count of lines " + range.Count);
                        var th = indexTh;
                        taskList.Add(Task.Factory.StartNew(() => CalculateCombinations(range, th)));
                    }
                }

                Task.WaitAll(taskList.ToArray());
                Console.WriteLine("Task Finished");

                // return;

                Console.WriteLine("Grouping...");
                var dic = wordsCombinations.GroupBy(x => x).ToDictionary(kvp => kvp.Key, kvp => kvp.Count());
                Console.WriteLine("Grouped");

                Console.WriteLine("Ordering...");
                //var needToAdd = dic.Select(x => new WordCombination
                //{
                //    Word1 = x.Key.Split(' ')[0],
                //    Word2 = x.Key.Split(' ')[1],
                //    Count = x.Value

                //}).ToList().OrderByDescending(x => x.Count).Select((r, i) => new WordCombination()
                //{
                //    Word1 = r.Word1,
                //    Word2 = r.Word2,
                //    Count = r.Count,
                //    Rank = i + 1
                //}).ToList();

                Console.WriteLine("Ordered");

                Console.WriteLine("Inserting...");
                var entity = new EnglishWordsEntities();
                // entity.BulkInsert(needToAdd);
                Console.WriteLine("Inserted");
            }
            catch (Exception e)
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine(e);
            }
        }