Beispiel #1
0
 public void UpdateDictionary()
 {
     VmCurrentWord[] words = GetWordsCollectionFromJson();
     using (var db = new WordContext())
     {
         var existedWords = db.Words.ToArray();
         foreach (VmCurrentWord word in words)
         {
             if (!Array.Exists(existedWords, element => element.Name_en == word.Name_en))
             {
                 db.Words.Update(new VmWord
                 {
                     Name_ru                 = word.Name_ru,
                     Name_en                 = word.Name_en,
                     FourDaysLearnPhase      = true,
                     LearnDay                = 0,
                     RepeatIterationNum      = 0,
                     NextRepeatDate          = DateTime.Today,
                     DailyReapeatCountForEng = 0,
                     DailyReapeatCountForRus = 0
                 });
                 Console.WriteLine("Updating word \"{0}\"", word.Name_en);
             }
             else
             {
                 Console.WriteLine("Skipped word \"{0}\"", word.Name_en);
             }
         }
         var count = db.SaveChanges();
         Console.WriteLine("{0} records saved to database", count);
     }
 }
 //public string Update([FromBody] VmWord[] words)
 // TODO: use better name for VmWordAndCollocationUpdating
 public string Update([FromBody] VmWordAndCollocationUpdating wordAndCollocationUpdating)
 {
     using (var db = new WordContext())
     {
         foreach (VmWord word in wordAndCollocationUpdating.words)
         {
             if (word == null)
             {
                 Console.WriteLine("Word is null");
                 throw new ArgumentNullException("Word is null");
             }
             else
             {
                 db.Words.Update(word);
                 Console.WriteLine("Updating word \"{0}\" id {1}", word.Name_en, word.Id);
             }
         }
         DateTime dateToday = DateTime.Now;
         int      collocationDelayPeriod = 4;
         foreach (VmCollocation collocation in wordAndCollocationUpdating.collocations)
         {
             if (collocation == null)
             {
                 Console.WriteLine("collocation is null");
                 throw new ArgumentNullException("collocation is null");
             }
             else
             {
                 if (collocation.NotUsedToday == false)
                 {
                     collocation.NextRepeatDate = dateToday.AddDays(collocationDelayPeriod);
                     collocation.NotUsedToday   = true;
                 }
                 db.Collocations.Update(collocation);
                 Console.WriteLine("Updating collocation \"{0}\"", collocation.AudioUrl);
             }
         }
         db.SaveChanges();
     }
     return("succes");
 }
        public string CheckAudio()
        {
            VmWord[] words;

            using (var db = new WordContext())
            {
                words = db.Words.Where(p => p.Name_ru.IndexOf(' ') < 0).ToArray();
            }

            FileChecker fileChecker = new FileChecker();

            foreach (VmWord word in words)
            {
                var path = Path.Combine(audioPath, word.Name_ru) + ".wav";
                if (!fileChecker.CheckIfExist(path))
                {
                    Console.WriteLine("File doesn't exist, path: {0}", path);
                    //throw new ArgumentNullException("missed audio file");
                }
            }
            return(null);
        }
        public void UpdateSchedule()
        {
            DateTime dateToday = DateTime.Now;

            VmWord[] renewingIteration;
            using (var db = new WordContext())
            {
                // TODO: move it to separated methods
                // Auto-removing duplicates
                var allWords   = db.Words.ToArray();
                var duplicates = db.Words.Where(x => allWords
                                                .Count(n => ((n.Name_ru == x.Name_ru) &&
                                                             (n.Name_en == x.Name_en))) > 1)
                                 .GroupBy(p => p.Name_ru)
                                 .Select(p => p.LastOrDefault());

                // TODO: dell all duplicates in one time,
                // now p.Skip(1) doesn't make it works

                foreach (VmWord word in duplicates)
                {
                    db.Words.Remove(word);
                    Console.WriteLine("Removing duplicate \"{0}\" id {1}", word.Name_en, word.Id);
                }

                // Manually-removing duplicates
                var duplicatesToResolve = db.Words.Where(x => allWords
                                                         .Count(n => ((n.Name_ru != x.Name_ru) && (n.Name_en == x.Name_en)) ||
                                                                ((n.Name_ru == x.Name_ru) && (n.Name_en != x.Name_en))) > 1)
                                          .GroupBy(p => p.Name_ru)
                                          .Select(p => p.LastOrDefault());

                if (duplicatesToResolve.Count() > 0)
                {
                    throw new Exception("There are duplicates thet should be resolved");
                }

                // Renewing Schedule
                renewingIteration = db.Words.Where(p => (p.NextRepeatDate <= dateToday) &&
                                                   (p.DailyReapeatCountForEng >= minReapeatCountPerDayIteration) &&
                                                   (p.DailyReapeatCountForRus >= minReapeatCountPerDayIteration) &&
                                                   (p.FourDaysLearnPhase == false)).ToArray();

                var iterationIncrement = 7;
                foreach (VmWord word in renewingIteration)
                {
                    var iteration = iterationIncrement * getIterationLenght(word.RepeatIterationNum);

                    word.NextRepeatDate          = dateToday.AddDays(iteration);
                    word.DailyReapeatCountForEng = 0;
                    word.DailyReapeatCountForRus = 0;

                    word.RepeatIterationNum++;

                    db.Words.Update(word);
                    Console.WriteLine("Set new day for repeating word \"{0}\" iterations {1} id {2}",
                                      word.Name_en, word.RepeatIterationNum, word.Id);
                }
                db.SaveChanges();
            }
        }
        public async Task <List <VmWordWithDictors> > GetWords()
        {
            VmWord[]             words;
            List <VmCollocation> collocations;
            DateTime             dateToday = DateTime.Now;
            int?dailyRepeatAmount          = 0;

            UpdateSchedule();

            using (var db = new WordContext())
            {
                if (db.Settings.First().DailyRepeatAmount != null)
                {
                    dailyRepeatAmount = db.Settings.First().DailyRepeatAmount;
                }
                words = db.Words.Where(p => (p.Name_ru.IndexOf(' ') < 0) &&
                                       (p.Name_en.IndexOf(' ') < 0) &&
                                       (p.NextRepeatDate <= dateToday))
                        .OrderBy(p => p.RepeatIterationNum).ToArray();

                collocations = db.Collocations.Where(p => p.NextRepeatDate <= dateToday).ToList();
            }

            List <VmWordWithDictors> wordsWithDictors = new List <VmWordWithDictors>();

            FileChecker fileChecker = new FileChecker();

            var collocationsUrl_en = Directory.GetFiles(Path.Combine(audioPath, "collocations", "en")).ToList();

            // Check It
            //List<VmCollocation> collocationsWithAudio = collocations
            //.Where(p => collocationsUrl_en.FirstOrDefault(z =>
            //z.Substring(z.LastIndexOf("/audio/")) == p.AudioUrl).Any()).ToList();

            // Temp Fix
            List <VmCollocation> collocationsWithAudio = collocations;

            List <VmCollocation> availableCollocations;
            int repeatCount = 0;

            foreach (VmWord word in words)
            {
                if (dailyRepeatAmount != 0 &&
                    repeatCount >= dailyRepeatAmount)
                {
                    break;
                }

                var path       = Path.Combine(audioPath, word.Name_ru);
                var dictors_en = GetDictors(path, "en", word.Name_en);
                var dictors_ru = GetDictors(path, "ru", word.Name_ru);

                //List<VmDictor> tempDictors_en = (dictors_en.Any()) ? dictors_en : new List<VmDictor>();
                //List<VmDictor> tempDictors_ru = (dictors_ru.Any()) ? dictors_ru : new List<VmDictor>();

                //var pathTempRu = Path.Combine(audioPath, word.Name_en) + ".wav";
                // TODO: add english words
                //if (!tempDictors_en.Any()
                //   && !fileChecker.ChecIfExist(pathTempEn))
                //{
                //    break;
                //}

                // TODO: check for mp3 too
                if (!dictors_ru.Any())
                {
                    Console.WriteLine("Word has no ru dictors: {0}", word.Name_ru);
                }
                else if (!dictors_en.Any())
                {
                    Console.WriteLine("Word has no en dictors: {0}", word.Name_en);
                }
                else
                {
                    //var availableCollocationsUrls = collocationsUrl_en.Where(p => p.IndexOf(word.Name_en) > 0);
                    //var availableCollocationsUrls = collocations.Where(p => p.AudioUrl.IndexOf(word.Name_en) > 0);

                    // TODO: check if audio exists
                    availableCollocations = collocationsWithAudio
                                            .Where(p => CheckIfContainsPattern(word.Name_en, p.AudioUrl)).ToList();

                    wordsWithDictors.Add(new VmWordWithDictors
                    {
                        Id                      = word.Id,
                        Name_en                 = word.Name_en,
                        Name_ru                 = word.Name_ru,
                        FourDaysLearnPhase      = word.FourDaysLearnPhase,
                        LearnDay                = word.LearnDay,
                        RepeatIterationNum      = word.RepeatIterationNum,
                        NextRepeatDate          = word.NextRepeatDate,
                        DailyReapeatCountForEng = word.DailyReapeatCountForEng,
                        DailyReapeatCountForRus = word.DailyReapeatCountForRus,
                        Dictors_en              = dictors_en,
                        Dictors_ru              = dictors_ru,
                        Collocation             = availableCollocations
                    });

                    if (dailyRepeatAmount != 0)
                    {
                        if (word.FourDaysLearnPhase)
                        {
                            repeatCount = repeatCount + 2 * minReapeatCountPerDayFourDayPhase;
                        }
                        else
                        {
                            repeatCount = repeatCount + 2 * minReapeatCountPerDayIteration;
                        }
                    }
                }
            }

            return(await Task <List <VmWordWithDictors> > .Factory.StartNew(() =>
            {
                return wordsWithDictors;
            }));
        }
        public void Download()
        {
            VmWord[]             words;
            List <VmCollocation> collocations;

            using (var db = new WordContext())
            {
                words = db.Words.Where(p => p.Name_ru.IndexOf(' ') < 0 &&
                                       (p.Name_en.IndexOf(' ') < 0)).ToArray();
            }

            string htmlCode;
            string tempURL;

            foreach (VmWord word in words)
            {
                using (WebClient client = new WebClient())
                {
                    client.Encoding = System.Text.Encoding.UTF8;
                    htmlCode        = client.DownloadString("https://www.ldoceonline.com/dictionary/" + word.Name_en);
                }

                var sourcePath         = "https://www.ldoceonline.com/";
                var urlFirstPart       = "data-src-mp3=\"/";
                var searchStartPattern = "\"EXAMPLE\"";

                var      searchStartIndex = htmlCode.IndexOf(searchStartPattern);
                var      indexesOfUrl     = new List <int>();
                var      urls             = new List <string>();
                string   lang             = "en";
                int      tagCounter       = 0;
                string[] nameParts        = new string[2];

                if (searchStartIndex >= 0)
                {
                    int    urlIndex = searchStartIndex;
                    int    nameTempIndex;
                    int    urlSubstrStart, urlSubstrEnd;
                    string url;
                    string nameSearchStartTag = "/span>";
                    int    nameStartIndex;
                    string nextCharPair, fileName;
                    int    nameEnd;
                    string excludeTagClass = "\"GLOSS\"";
                    while (urlIndex < htmlCode.LastIndexOf(urlFirstPart))
                    {
                        urlSubstrStart = htmlCode.IndexOf(urlFirstPart, urlIndex) + urlFirstPart.Length;
                        if (urlSubstrStart < 0)
                        {
                            break;
                        }
                        urlSubstrEnd = htmlCode.IndexOf(".mp3", urlSubstrStart) + 4;

                        url = htmlCode.Substring(urlSubstrStart, urlSubstrEnd - urlSubstrStart);

                        urls.Add(url);
                        Console.WriteLine(url);

                        nameTempIndex = urlSubstrEnd;

                        nameStartIndex = htmlCode.IndexOf(nameSearchStartTag, nameTempIndex) + 6;
                        nextCharPair   = htmlCode.Substring(nameStartIndex, 2);

                        tagCounter = 0;

                        if (nextCharPair == "<s")
                        {
                            tagCounter++;
                            nameStartIndex = htmlCode.IndexOf(">", nameStartIndex) + 1;

                            // TODO: remove repetitation
                            nextCharPair = htmlCode.Substring(nameStartIndex, 2);
                            if (nextCharPair == "<s")
                            {
                                throw new ArgumentNullException("Unknow tags arangment");
                            }
                        }

                        nameParts[0] = "";
                        nameParts[1] = "";

                        if (tagCounter > 0)
                        {
                            if (tagCounter > 1)
                            {
                                throw new ArgumentNullException("Unknow tags arangment");
                            }
                            nameEnd  = htmlCode.IndexOf("</span>", nameStartIndex);
                            fileName = htmlCode.Substring(nameStartIndex, nameEnd - nameStartIndex);

                            nameParts[0]   = fileName;
                            nameStartIndex = nameEnd + 7;
                        }

                        nameEnd  = htmlCode.IndexOf("</span>", nameStartIndex);
                        fileName = htmlCode.Substring(nameStartIndex, nameEnd - nameStartIndex);

                        // TODO: solve hotfixes
                        // COLLOINEXA found there:
                        // I promise not to <span class="COLLOINEXA">disturb anything
                        if (fileName.IndexOf(excludeTagClass) < 0 &&
                            fileName.IndexOf("COLLOINEXA") < 0)
                        {
                            nameParts[1] = fileName;
                        }


                        fileName = nameParts[0] + nameParts[1];
                        urlIndex = urlSubstrEnd;
                        Console.WriteLine(fileName);

                        using (var db = new WordContext())
                        {
                            tempURL = Path.Combine("/" + "audio", collocationPath, lang, fileName + ".mp3");

                            if (fileName != "")
                            {
                                GetAndSave(sourcePath + url, fileName, lang);
                                var isCollocationExist = db.Collocations.Any(p => p.AudioUrl == tempURL);

                                if (!isCollocationExist)
                                {
                                    db.Collocations.Add(new VmCollocation
                                    {
                                        Lang         = "en",
                                        AudioUrl     = tempURL,
                                        NotUsedToday = true
                                    });
                                }
                                db.SaveChanges();
                            }
                        }
                    }
                }
            }

            using (var db = new WordContext())
            {
                // TODO: check for dublicates
                var allCollocations = db.Collocations.ToList();
                var duplicates      = allCollocations.Where(p => allCollocations
                                                            .Count(z => z.AudioUrl == p.AudioUrl) > 1)
                                      .GroupBy(j => j.AudioUrl)
                                      .Select(p => p.LastOrDefault()).ToList();

                foreach (VmCollocation collocation in duplicates)
                {
                    db.Collocations.Remove(collocation);
                    Console.WriteLine("Removing duplicate \"{0}\" id {1}", collocation.AudioUrl, collocation.Id);
                }

                collocations = db.Collocations.ToList();
                var collocationsTemp = Directory.GetFiles(Path.Combine(audioPath, collocationPath)).ToList();

                var collocationsForExclude = collocationsTemp
                                             .Where(p => collocations.All(z => z.AudioUrl == p)).ToList();

                var collocationsNew = collocationsTemp.Except(collocationsForExclude);

                foreach (string collocation in collocationsNew)
                {
                    collocations.Add(new VmCollocation {
                        Lang         = "en",
                        AudioUrl     = collocation,
                        NotUsedToday = true,
                    });
                }

                db.SaveChanges();
            }
        }
Beispiel #7
0
        public void FindNewWords()
        {
            VmWord[] wordsDB;
            string   excludedWordsFile = File.ReadAllText(
                Path.Combine(Directory.GetCurrentDirectory(), "txt", "excluded-words.txt"));

            List <string> excludedWords = new HashSet <string>(excludedWordsFile
                                                               .Split(new[] { '\r', '\n', ' ' })).ToList();

            string newWordsFile = Path.Combine(Directory.GetCurrentDirectory(), "txt", "text-for-word-finding.txt");

            string input = File.ReadAllText(newWordsFile);

            using (var db = new WordContext())
            {
                wordsDB = db.Words.Where(p => (p.Name_ru.IndexOf(' ') < 0) &&
                                         (p.Name_en.IndexOf(' ') < 0)).ToArray();
            }



            List <string> wordsToCheck = new HashSet <string>(input
                                                              .Split(new[] { '\r', '\n', ' ', '.', ',', '!', '?', '(', ')', '[', ']', ':', ';' }))
                                         .ToList();
            List <string> wordsInSimpleFormToCheck = new List <string> {
            };

            string wordTemp;

            // Test
            wordsToCheck.Add("dogs");
            wordsToCheck.Add("kats");
            wordsToCheck.Add("cars");
            wordsToCheck.Add("women");

            foreach (string word in wordsToCheck)
            {
                if (word.IndexOf("/") >= 0)
                {
                    wordTemp = null;
                    Console.WriteLine(word + " passed word");
                }
                else
                {
                    wordTemp = GetWordSimpleForm(word);
                    wordsInSimpleFormToCheck.Add(wordTemp);
                }
            }

            var newWords = new HashSet <string>(wordsInSimpleFormToCheck
                                                .Where(p => !wordsDB.Any(z => z.Name_en == p) &&
                                                       !excludedWords.Any(z => z == p)).ToList());

            using (StreamWriter file = File.CreateText(newWordsPath))
            {
                foreach (string word in newWords)
                {
                    file.WriteLine(word);
                    Console.Write(word);
                }
            }
        }
Beispiel #8
0
        public void Download()
        {
            var            apiPath = Path.Combine(jsonConfigPath, "api-config.json");
            VmParserConfig api     = JsonConvert.DeserializeObject <VmParserConfig>(File.ReadAllText(apiPath));

            var parsedWodListPath = Path.Combine(jsonConfigPath, "parsed-wod-list.json");
            var parsedWods        = JsonConvert.DeserializeObject <List <string> >(File.ReadAllText(parsedWodListPath));

            var bestDictorsPath = Path.Combine(jsonConfigPath, "best-dictors.json");
            var bestDictors     = JsonConvert.DeserializeObject <List <string> >(File.ReadAllText(bestDictorsPath));

            var worstDictorsPath = Path.Combine(jsonConfigPath, "worst-dictors.json");
            var worstDictors     = JsonConvert.DeserializeObject <List <string> >(File.ReadAllText(worstDictorsPath));

            VmWord[] words;

            using (var db = new WordContext())
            {
                words = db.Words.Where(p => p.Name_ru.IndexOf(' ') < 0 &&
                                       (p.Name_en.IndexOf(' ') < 0)).ToArray();
            }

            foreach (VmWord parserWord in words)
            {
                var    lang = "en";
                string wordName;

                switch (lang)
                {
                case "ru":
                    wordName = parserWord.Name_ru.ToLower();
                    break;

                case "en":
                    wordName = parserWord.Name_en.ToLower();
                    break;

                default:
                    throw new Exception("lang should be setted");
                }

                Console.WriteLine(audioPath + "/" + parserWord.Name_ru + ".mp3");
                var maxDictorsCount = 5;

                var existDictors = 0;
                if (Directory.Exists(audioPath + "/" + parserWord.Name_ru + "/" + lang))
                {
                    existDictors = Directory
                                   .GetDirectories(audioPath + "/" + parserWord.Name_ru + "/" + lang).Length;
                }

                var defaultAudioPath = Path.Combine(audioPath, "default", lang);

                if (parsedWods.IndexOf(wordName) < 0 &&
                    !File.Exists(defaultAudioPath + "/" + wordName + ".mp3") &&
                    !File.Exists(defaultAudioPath + "/" + wordName + ".wav") &&
                    (existDictors < maxDictorsCount) &&
                    (wordName.IndexOf('_') < 0))
                {
                    string wordRequestUrl = api.Url + wordName + "/language/" + lang;
                    Console.WriteLine(wordRequestUrl);

                    VmResponseWord wordCollection = GetWordColletion(wordRequestUrl);

                    var bestDictorsTemp = wordCollection.items.Where(
                        p => bestDictors.Any(z => z.Equals(p.username))
                        );
                    var normalDictorsTemp = wordCollection.items.Where(
                        p => (!bestDictors.Any(z => z.Equals(p.username)) &&
                              !worstDictors.Any(z => z.Equals(p.username)))
                        );
                    var worstDictorsTemp = wordCollection.items.Where(
                        p => worstDictors.Any(z => z.Equals(p.username))
                        );

                    int dictorCount = wordCollection.items.Count();
                    VmResponseWordItem[] sortedDictors = new VmResponseWordItem[dictorCount];

                    int iForDictors = 0;

                    foreach (VmResponseWordItem dictor in bestDictorsTemp)
                    {
                        sortedDictors[iForDictors] = dictor;
                        iForDictors++;
                    }
                    foreach (VmResponseWordItem dictor in normalDictorsTemp)
                    {
                        sortedDictors[iForDictors] = dictor;
                        iForDictors++;
                    }
                    foreach (VmResponseWordItem dictor in worstDictorsTemp)
                    {
                        sortedDictors[iForDictors] = dictor;
                        iForDictors++;
                    }

                    // Adding woordhunt's dictors
                    // TODO: check if mp3 exist
                    //if (lang == "en"){
                    //    wordCollection.items.Add(new VmResponseWordItem
                    //    {
                    //        id = 0,
                    //        word = wordName,
                    //        code = "en",
                    //        username = "******",
                    //        pathmp3 = "http://wooordhunt.ru/data/sound/word/uk/mp3/" + wordName + ".mp3"
                    //    });
                    //    wordCollection.items.Add(new VmResponseWordItem
                    //    {
                    //        id = 0,
                    //        word = wordName,
                    //        code = "en",
                    //        username = "******",
                    //        pathmp3 = "http://wooordhunt.ru/data/sound/word/us/mp3/" + wordName + ".mp3"
                    //    });
                    //}

                    Console.WriteLine("");
                    Console.WriteLine("delay");
                    Console.WriteLine(wordName);

                    string dictorLang;

                    int i = 0;

                    foreach (VmResponseWordItem item in sortedDictors)
                    {
                        dictorLang = item.code;
                        System.Threading.Thread.Sleep(10);
                        if ((item.pathmp3 != null) && ((dictorLang == "en") || (dictorLang == "ru")))
                        {
                            // TODO: made lang switcher

                            if (i <= maxDictorsCount)
                            {
                                GetAndSave(parserWord.Name_en, parserWord.Name_ru,
                                           wordName, dictorLang, item.pathmp3, item.username);
                            }
                            i++;
                        }
                        else
                        {
                            // TODO: write log with words without audio
                            Console.WriteLine("Word \"{0}\" hasn't audio", wordName);
                        }
                    }

                    parsedWods.Add(wordName);
                }
            }

            using (StreamWriter file = File.CreateText(parsedWodListPath))
            {
                JsonSerializer serializer = new JsonSerializer();
                //serialize object directly into file stream
                serializer.Serialize(file, parsedWods);
            }
        }