public void UpdateDictionary() { VmCurrentWord[] words = GetWordsCollectionFromJson(); using (var db = new WordContext()) { var existedWords = db.Words.ToArray(); foreach (VmCurrentWord word in words) { if (!Array.Exists(existedWords, element => element.Name_en == word.Name_en)) { db.Words.Update(new VmWord { Name_ru = word.Name_ru, Name_en = word.Name_en, FourDaysLearnPhase = true, LearnDay = 0, RepeatIterationNum = 0, NextRepeatDate = DateTime.Today, DailyReapeatCountForEng = 0, DailyReapeatCountForRus = 0 }); Console.WriteLine("Updating word \"{0}\"", word.Name_en); } else { Console.WriteLine("Skipped word \"{0}\"", word.Name_en); } } var count = db.SaveChanges(); Console.WriteLine("{0} records saved to database", count); } }
//public string Update([FromBody] VmWord[] words) // TODO: use better name for VmWordAndCollocationUpdating public string Update([FromBody] VmWordAndCollocationUpdating wordAndCollocationUpdating) { using (var db = new WordContext()) { foreach (VmWord word in wordAndCollocationUpdating.words) { if (word == null) { Console.WriteLine("Word is null"); throw new ArgumentNullException("Word is null"); } else { db.Words.Update(word); Console.WriteLine("Updating word \"{0}\" id {1}", word.Name_en, word.Id); } } DateTime dateToday = DateTime.Now; int collocationDelayPeriod = 4; foreach (VmCollocation collocation in wordAndCollocationUpdating.collocations) { if (collocation == null) { Console.WriteLine("collocation is null"); throw new ArgumentNullException("collocation is null"); } else { if (collocation.NotUsedToday == false) { collocation.NextRepeatDate = dateToday.AddDays(collocationDelayPeriod); collocation.NotUsedToday = true; } db.Collocations.Update(collocation); Console.WriteLine("Updating collocation \"{0}\"", collocation.AudioUrl); } } db.SaveChanges(); } return("succes"); }
public string CheckAudio() { VmWord[] words; using (var db = new WordContext()) { words = db.Words.Where(p => p.Name_ru.IndexOf(' ') < 0).ToArray(); } FileChecker fileChecker = new FileChecker(); foreach (VmWord word in words) { var path = Path.Combine(audioPath, word.Name_ru) + ".wav"; if (!fileChecker.CheckIfExist(path)) { Console.WriteLine("File doesn't exist, path: {0}", path); //throw new ArgumentNullException("missed audio file"); } } return(null); }
public void UpdateSchedule() { DateTime dateToday = DateTime.Now; VmWord[] renewingIteration; using (var db = new WordContext()) { // TODO: move it to separated methods // Auto-removing duplicates var allWords = db.Words.ToArray(); var duplicates = db.Words.Where(x => allWords .Count(n => ((n.Name_ru == x.Name_ru) && (n.Name_en == x.Name_en))) > 1) .GroupBy(p => p.Name_ru) .Select(p => p.LastOrDefault()); // TODO: dell all duplicates in one time, // now p.Skip(1) doesn't make it works foreach (VmWord word in duplicates) { db.Words.Remove(word); Console.WriteLine("Removing duplicate \"{0}\" id {1}", word.Name_en, word.Id); } // Manually-removing duplicates var duplicatesToResolve = db.Words.Where(x => allWords .Count(n => ((n.Name_ru != x.Name_ru) && (n.Name_en == x.Name_en)) || ((n.Name_ru == x.Name_ru) && (n.Name_en != x.Name_en))) > 1) .GroupBy(p => p.Name_ru) .Select(p => p.LastOrDefault()); if (duplicatesToResolve.Count() > 0) { throw new Exception("There are duplicates thet should be resolved"); } // Renewing Schedule renewingIteration = db.Words.Where(p => (p.NextRepeatDate <= dateToday) && (p.DailyReapeatCountForEng >= minReapeatCountPerDayIteration) && (p.DailyReapeatCountForRus >= minReapeatCountPerDayIteration) && (p.FourDaysLearnPhase == false)).ToArray(); var iterationIncrement = 7; foreach (VmWord word in renewingIteration) { var iteration = iterationIncrement * getIterationLenght(word.RepeatIterationNum); word.NextRepeatDate = dateToday.AddDays(iteration); word.DailyReapeatCountForEng = 0; word.DailyReapeatCountForRus = 0; word.RepeatIterationNum++; db.Words.Update(word); Console.WriteLine("Set new day for repeating word \"{0}\" iterations {1} id {2}", word.Name_en, word.RepeatIterationNum, word.Id); } db.SaveChanges(); } }
public async Task <List <VmWordWithDictors> > GetWords() { VmWord[] words; List <VmCollocation> collocations; DateTime dateToday = DateTime.Now; int?dailyRepeatAmount = 0; UpdateSchedule(); using (var db = new WordContext()) { if (db.Settings.First().DailyRepeatAmount != null) { dailyRepeatAmount = db.Settings.First().DailyRepeatAmount; } words = db.Words.Where(p => (p.Name_ru.IndexOf(' ') < 0) && (p.Name_en.IndexOf(' ') < 0) && (p.NextRepeatDate <= dateToday)) .OrderBy(p => p.RepeatIterationNum).ToArray(); collocations = db.Collocations.Where(p => p.NextRepeatDate <= dateToday).ToList(); } List <VmWordWithDictors> wordsWithDictors = new List <VmWordWithDictors>(); FileChecker fileChecker = new FileChecker(); var collocationsUrl_en = Directory.GetFiles(Path.Combine(audioPath, "collocations", "en")).ToList(); // Check It //List<VmCollocation> collocationsWithAudio = collocations //.Where(p => collocationsUrl_en.FirstOrDefault(z => //z.Substring(z.LastIndexOf("/audio/")) == p.AudioUrl).Any()).ToList(); // Temp Fix List <VmCollocation> collocationsWithAudio = collocations; List <VmCollocation> availableCollocations; int repeatCount = 0; foreach (VmWord word in words) { if (dailyRepeatAmount != 0 && repeatCount >= dailyRepeatAmount) { break; } var path = Path.Combine(audioPath, word.Name_ru); var dictors_en = GetDictors(path, "en", word.Name_en); var dictors_ru = GetDictors(path, "ru", word.Name_ru); //List<VmDictor> tempDictors_en = (dictors_en.Any()) ? dictors_en : new List<VmDictor>(); //List<VmDictor> tempDictors_ru = (dictors_ru.Any()) ? dictors_ru : new List<VmDictor>(); //var pathTempRu = Path.Combine(audioPath, word.Name_en) + ".wav"; // TODO: add english words //if (!tempDictors_en.Any() // && !fileChecker.ChecIfExist(pathTempEn)) //{ // break; //} // TODO: check for mp3 too if (!dictors_ru.Any()) { Console.WriteLine("Word has no ru dictors: {0}", word.Name_ru); } else if (!dictors_en.Any()) { Console.WriteLine("Word has no en dictors: {0}", word.Name_en); } else { //var availableCollocationsUrls = collocationsUrl_en.Where(p => p.IndexOf(word.Name_en) > 0); //var availableCollocationsUrls = collocations.Where(p => p.AudioUrl.IndexOf(word.Name_en) > 0); // TODO: check if audio exists availableCollocations = collocationsWithAudio .Where(p => CheckIfContainsPattern(word.Name_en, p.AudioUrl)).ToList(); wordsWithDictors.Add(new VmWordWithDictors { Id = word.Id, Name_en = word.Name_en, Name_ru = word.Name_ru, FourDaysLearnPhase = word.FourDaysLearnPhase, LearnDay = word.LearnDay, RepeatIterationNum = word.RepeatIterationNum, NextRepeatDate = word.NextRepeatDate, DailyReapeatCountForEng = word.DailyReapeatCountForEng, DailyReapeatCountForRus = word.DailyReapeatCountForRus, Dictors_en = dictors_en, Dictors_ru = dictors_ru, Collocation = availableCollocations }); if (dailyRepeatAmount != 0) { if (word.FourDaysLearnPhase) { repeatCount = repeatCount + 2 * minReapeatCountPerDayFourDayPhase; } else { repeatCount = repeatCount + 2 * minReapeatCountPerDayIteration; } } } } return(await Task <List <VmWordWithDictors> > .Factory.StartNew(() => { return wordsWithDictors; })); }
public void Download() { VmWord[] words; List <VmCollocation> collocations; using (var db = new WordContext()) { words = db.Words.Where(p => p.Name_ru.IndexOf(' ') < 0 && (p.Name_en.IndexOf(' ') < 0)).ToArray(); } string htmlCode; string tempURL; foreach (VmWord word in words) { using (WebClient client = new WebClient()) { client.Encoding = System.Text.Encoding.UTF8; htmlCode = client.DownloadString("https://www.ldoceonline.com/dictionary/" + word.Name_en); } var sourcePath = "https://www.ldoceonline.com/"; var urlFirstPart = "data-src-mp3=\"/"; var searchStartPattern = "\"EXAMPLE\""; var searchStartIndex = htmlCode.IndexOf(searchStartPattern); var indexesOfUrl = new List <int>(); var urls = new List <string>(); string lang = "en"; int tagCounter = 0; string[] nameParts = new string[2]; if (searchStartIndex >= 0) { int urlIndex = searchStartIndex; int nameTempIndex; int urlSubstrStart, urlSubstrEnd; string url; string nameSearchStartTag = "/span>"; int nameStartIndex; string nextCharPair, fileName; int nameEnd; string excludeTagClass = "\"GLOSS\""; while (urlIndex < htmlCode.LastIndexOf(urlFirstPart)) { urlSubstrStart = htmlCode.IndexOf(urlFirstPart, urlIndex) + urlFirstPart.Length; if (urlSubstrStart < 0) { break; } urlSubstrEnd = htmlCode.IndexOf(".mp3", urlSubstrStart) + 4; url = htmlCode.Substring(urlSubstrStart, urlSubstrEnd - urlSubstrStart); urls.Add(url); Console.WriteLine(url); nameTempIndex = urlSubstrEnd; nameStartIndex = htmlCode.IndexOf(nameSearchStartTag, nameTempIndex) + 6; nextCharPair = htmlCode.Substring(nameStartIndex, 2); tagCounter = 0; if (nextCharPair == "<s") { tagCounter++; nameStartIndex = htmlCode.IndexOf(">", nameStartIndex) + 1; // TODO: remove repetitation nextCharPair = htmlCode.Substring(nameStartIndex, 2); if (nextCharPair == "<s") { throw new ArgumentNullException("Unknow tags arangment"); } } nameParts[0] = ""; nameParts[1] = ""; if (tagCounter > 0) { if (tagCounter > 1) { throw new ArgumentNullException("Unknow tags arangment"); } nameEnd = htmlCode.IndexOf("</span>", nameStartIndex); fileName = htmlCode.Substring(nameStartIndex, nameEnd - nameStartIndex); nameParts[0] = fileName; nameStartIndex = nameEnd + 7; } nameEnd = htmlCode.IndexOf("</span>", nameStartIndex); fileName = htmlCode.Substring(nameStartIndex, nameEnd - nameStartIndex); // TODO: solve hotfixes // COLLOINEXA found there: // I promise not to <span class="COLLOINEXA">disturb anything if (fileName.IndexOf(excludeTagClass) < 0 && fileName.IndexOf("COLLOINEXA") < 0) { nameParts[1] = fileName; } fileName = nameParts[0] + nameParts[1]; urlIndex = urlSubstrEnd; Console.WriteLine(fileName); using (var db = new WordContext()) { tempURL = Path.Combine("/" + "audio", collocationPath, lang, fileName + ".mp3"); if (fileName != "") { GetAndSave(sourcePath + url, fileName, lang); var isCollocationExist = db.Collocations.Any(p => p.AudioUrl == tempURL); if (!isCollocationExist) { db.Collocations.Add(new VmCollocation { Lang = "en", AudioUrl = tempURL, NotUsedToday = true }); } db.SaveChanges(); } } } } } using (var db = new WordContext()) { // TODO: check for dublicates var allCollocations = db.Collocations.ToList(); var duplicates = allCollocations.Where(p => allCollocations .Count(z => z.AudioUrl == p.AudioUrl) > 1) .GroupBy(j => j.AudioUrl) .Select(p => p.LastOrDefault()).ToList(); foreach (VmCollocation collocation in duplicates) { db.Collocations.Remove(collocation); Console.WriteLine("Removing duplicate \"{0}\" id {1}", collocation.AudioUrl, collocation.Id); } collocations = db.Collocations.ToList(); var collocationsTemp = Directory.GetFiles(Path.Combine(audioPath, collocationPath)).ToList(); var collocationsForExclude = collocationsTemp .Where(p => collocations.All(z => z.AudioUrl == p)).ToList(); var collocationsNew = collocationsTemp.Except(collocationsForExclude); foreach (string collocation in collocationsNew) { collocations.Add(new VmCollocation { Lang = "en", AudioUrl = collocation, NotUsedToday = true, }); } db.SaveChanges(); } }
public void FindNewWords() { VmWord[] wordsDB; string excludedWordsFile = File.ReadAllText( Path.Combine(Directory.GetCurrentDirectory(), "txt", "excluded-words.txt")); List <string> excludedWords = new HashSet <string>(excludedWordsFile .Split(new[] { '\r', '\n', ' ' })).ToList(); string newWordsFile = Path.Combine(Directory.GetCurrentDirectory(), "txt", "text-for-word-finding.txt"); string input = File.ReadAllText(newWordsFile); using (var db = new WordContext()) { wordsDB = db.Words.Where(p => (p.Name_ru.IndexOf(' ') < 0) && (p.Name_en.IndexOf(' ') < 0)).ToArray(); } List <string> wordsToCheck = new HashSet <string>(input .Split(new[] { '\r', '\n', ' ', '.', ',', '!', '?', '(', ')', '[', ']', ':', ';' })) .ToList(); List <string> wordsInSimpleFormToCheck = new List <string> { }; string wordTemp; // Test wordsToCheck.Add("dogs"); wordsToCheck.Add("kats"); wordsToCheck.Add("cars"); wordsToCheck.Add("women"); foreach (string word in wordsToCheck) { if (word.IndexOf("/") >= 0) { wordTemp = null; Console.WriteLine(word + " passed word"); } else { wordTemp = GetWordSimpleForm(word); wordsInSimpleFormToCheck.Add(wordTemp); } } var newWords = new HashSet <string>(wordsInSimpleFormToCheck .Where(p => !wordsDB.Any(z => z.Name_en == p) && !excludedWords.Any(z => z == p)).ToList()); using (StreamWriter file = File.CreateText(newWordsPath)) { foreach (string word in newWords) { file.WriteLine(word); Console.Write(word); } } }
public void Download() { var apiPath = Path.Combine(jsonConfigPath, "api-config.json"); VmParserConfig api = JsonConvert.DeserializeObject <VmParserConfig>(File.ReadAllText(apiPath)); var parsedWodListPath = Path.Combine(jsonConfigPath, "parsed-wod-list.json"); var parsedWods = JsonConvert.DeserializeObject <List <string> >(File.ReadAllText(parsedWodListPath)); var bestDictorsPath = Path.Combine(jsonConfigPath, "best-dictors.json"); var bestDictors = JsonConvert.DeserializeObject <List <string> >(File.ReadAllText(bestDictorsPath)); var worstDictorsPath = Path.Combine(jsonConfigPath, "worst-dictors.json"); var worstDictors = JsonConvert.DeserializeObject <List <string> >(File.ReadAllText(worstDictorsPath)); VmWord[] words; using (var db = new WordContext()) { words = db.Words.Where(p => p.Name_ru.IndexOf(' ') < 0 && (p.Name_en.IndexOf(' ') < 0)).ToArray(); } foreach (VmWord parserWord in words) { var lang = "en"; string wordName; switch (lang) { case "ru": wordName = parserWord.Name_ru.ToLower(); break; case "en": wordName = parserWord.Name_en.ToLower(); break; default: throw new Exception("lang should be setted"); } Console.WriteLine(audioPath + "/" + parserWord.Name_ru + ".mp3"); var maxDictorsCount = 5; var existDictors = 0; if (Directory.Exists(audioPath + "/" + parserWord.Name_ru + "/" + lang)) { existDictors = Directory .GetDirectories(audioPath + "/" + parserWord.Name_ru + "/" + lang).Length; } var defaultAudioPath = Path.Combine(audioPath, "default", lang); if (parsedWods.IndexOf(wordName) < 0 && !File.Exists(defaultAudioPath + "/" + wordName + ".mp3") && !File.Exists(defaultAudioPath + "/" + wordName + ".wav") && (existDictors < maxDictorsCount) && (wordName.IndexOf('_') < 0)) { string wordRequestUrl = api.Url + wordName + "/language/" + lang; Console.WriteLine(wordRequestUrl); VmResponseWord wordCollection = GetWordColletion(wordRequestUrl); var bestDictorsTemp = wordCollection.items.Where( p => bestDictors.Any(z => z.Equals(p.username)) ); var normalDictorsTemp = wordCollection.items.Where( p => (!bestDictors.Any(z => z.Equals(p.username)) && !worstDictors.Any(z => z.Equals(p.username))) ); var worstDictorsTemp = wordCollection.items.Where( p => worstDictors.Any(z => z.Equals(p.username)) ); int dictorCount = wordCollection.items.Count(); VmResponseWordItem[] sortedDictors = new VmResponseWordItem[dictorCount]; int iForDictors = 0; foreach (VmResponseWordItem dictor in bestDictorsTemp) { sortedDictors[iForDictors] = dictor; iForDictors++; } foreach (VmResponseWordItem dictor in normalDictorsTemp) { sortedDictors[iForDictors] = dictor; iForDictors++; } foreach (VmResponseWordItem dictor in worstDictorsTemp) { sortedDictors[iForDictors] = dictor; iForDictors++; } // Adding woordhunt's dictors // TODO: check if mp3 exist //if (lang == "en"){ // wordCollection.items.Add(new VmResponseWordItem // { // id = 0, // word = wordName, // code = "en", // username = "******", // pathmp3 = "http://wooordhunt.ru/data/sound/word/uk/mp3/" + wordName + ".mp3" // }); // wordCollection.items.Add(new VmResponseWordItem // { // id = 0, // word = wordName, // code = "en", // username = "******", // pathmp3 = "http://wooordhunt.ru/data/sound/word/us/mp3/" + wordName + ".mp3" // }); //} Console.WriteLine(""); Console.WriteLine("delay"); Console.WriteLine(wordName); string dictorLang; int i = 0; foreach (VmResponseWordItem item in sortedDictors) { dictorLang = item.code; System.Threading.Thread.Sleep(10); if ((item.pathmp3 != null) && ((dictorLang == "en") || (dictorLang == "ru"))) { // TODO: made lang switcher if (i <= maxDictorsCount) { GetAndSave(parserWord.Name_en, parserWord.Name_ru, wordName, dictorLang, item.pathmp3, item.username); } i++; } else { // TODO: write log with words without audio Console.WriteLine("Word \"{0}\" hasn't audio", wordName); } } parsedWods.Add(wordName); } } using (StreamWriter file = File.CreateText(parsedWodListPath)) { JsonSerializer serializer = new JsonSerializer(); //serialize object directly into file stream serializer.Serialize(file, parsedWods); } }